aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--.gitignore.license4
-rw-r--r--1.0/HalPolicy.cpp59
-rw-r--r--1.0/HalPolicy.hpp11
-rw-r--r--1.1/HalPolicy.cpp21
-rw-r--r--1.1/HalPolicy.hpp9
-rw-r--r--1.2/ArmnnDriver.hpp44
-rw-r--r--1.2/ArmnnDriverImpl.cpp450
-rw-r--r--1.2/ArmnnDriverImpl.hpp30
-rw-r--r--1.2/HalPolicy.cpp145
-rw-r--r--1.2/HalPolicy.hpp35
-rw-r--r--1.3/ArmnnDriver.hpp90
-rw-r--r--1.3/ArmnnDriverImpl.cpp452
-rw-r--r--1.3/ArmnnDriverImpl.hpp29
-rw-r--r--1.3/HalPolicy.cpp116
-rw-r--r--1.3/HalPolicy.hpp29
-rw-r--r--Android.bp58
-rw-r--r--Android.mk161
-rw-r--r--ArmnnDevice.cpp14
-rw-r--r--ArmnnDriverImpl.cpp73
-rw-r--r--ArmnnPreparedModel.cpp212
-rw-r--r--ArmnnPreparedModel.hpp63
-rw-r--r--ArmnnPreparedModel_1_2.cpp281
-rw-r--r--ArmnnPreparedModel_1_2.hpp77
-rw-r--r--ArmnnPreparedModel_1_3.cpp355
-rw-r--r--ArmnnPreparedModel_1_3.hpp85
-rw-r--r--CacheDataHandler.cpp66
-rw-r--r--CacheDataHandler.hpp68
-rw-r--r--ConversionUtils.cpp44
-rw-r--r--ConversionUtils.hpp750
-rw-r--r--ConversionUtils_1_2.hpp1249
-rw-r--r--ConversionUtils_1_3.hpp30
-rw-r--r--DriverOptions.cpp34
-rw-r--r--DriverOptions.hpp8
-rw-r--r--LICENSE.spdx756
-rw-r--r--LICENSES/MIT.txt9
-rw-r--r--ModelToINetworkConverter.cpp18
-rw-r--r--NnapiSupport.txt98
-rw-r--r--NnapiSupport.txt.license4
-rw-r--r--README.md.license4
-rw-r--r--RequestThread.cpp7
-rw-r--r--RequestThread_1_3.cpp10
-rw-r--r--SECURITY.md.license4
-rw-r--r--Utils.cpp379
-rw-r--r--Utils.hpp38
-rw-r--r--android.hardware.neuralnetworks@1.0-service-armnn.rc.license4
-rw-r--r--android.hardware.neuralnetworks@1.1-service-armnn.rc.license4
-rw-r--r--android.hardware.neuralnetworks@1.2-service-armnn.rc.license4
-rw-r--r--android.hardware.neuralnetworks@1.3-service-armnn.rc.license4
-rw-r--r--docs/FAQ.md37
-rw-r--r--docs/FAQ.md.license4
-rw-r--r--docs/IntegratorGuide.md135
-rw-r--r--docs/IntegratorGuide.md.license4
-rwxr-xr-xsetup.sh67
-rw-r--r--test/1.0/Convolution2D.cpp16
-rw-r--r--test/1.0/FullyConnectedReshape.cpp37
-rw-r--r--test/1.0/Lstm.cpp64
-rw-r--r--test/1.1/Convolution2D.cpp21
-rw-r--r--test/1.1/Lstm.cpp64
-rw-r--r--test/1.1/Mean.cpp207
-rw-r--r--test/1.1/Transpose.cpp116
-rw-r--r--test/1.2/Capabilities.cpp35
-rw-r--r--test/1.2/Dilation.cpp27
-rw-r--r--test/1.2/Lstm.cpp83
-rw-r--r--test/1.2/Mean.cpp204
-rw-r--r--test/1.2/UnidirectionalSequenceLstm.cpp40
-rw-r--r--test/1.3/QLstm.cpp85
-rw-r--r--test/1.3/QosTests.cpp33
-rw-r--r--test/Android.mk115
-rw-r--r--test/Concat.cpp687
-rw-r--r--test/Concurrent.cpp26
-rw-r--r--test/Convolution2D.hpp75
-rw-r--r--test/Dilation.hpp71
-rw-r--r--test/DriverTestHelpers.cpp27
-rw-r--r--test/DriverTestHelpers.hpp18
-rw-r--r--test/FullyConnected.cpp71
-rw-r--r--test/GenericLayerTests.cpp60
-rw-r--r--test/Lstm.hpp53
-rw-r--r--test/SystemProperties.cpp27
-rw-r--r--test/TestHalfTensor.cpp33
-rw-r--r--test/TestHalfTensor.hpp38
-rw-r--r--test/TestTensor.cpp5
-rw-r--r--test/TestTensor.hpp10
-rw-r--r--test/Tests.cpp34
-rw-r--r--test/UnidirectionalSequenceLstm.hpp1419
-rw-r--r--test/UtilsTests.cpp73
86 files changed, 8389 insertions, 2228 deletions
diff --git a/.gitignore b/.gitignore
index 18a48d24..fa0683db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
armnn
boost_1_64_0
clframework
+flatbuffers-1.12.0
prebuilt
.vscode/settings.json
.gitignore
diff --git a/.gitignore.license b/.gitignore.license
new file mode 100644
index 00000000..7964c7db
--- /dev/null
+++ b/.gitignore.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2018, 2022 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/1.0/HalPolicy.cpp b/1.0/HalPolicy.cpp
index 7e9e9efa..ce578181 100644
--- a/1.0/HalPolicy.cpp
+++ b/1.0/HalPolicy.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -20,7 +20,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
switch (operation.type)
{
case V1_0::OperationType::ADD:
- return ConvertAdd(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, armnn::BinaryOperation::Add);
case V1_0::OperationType::AVERAGE_POOL_2D:
return ConvertAveragePool2d(operation, model, data);
case V1_0::OperationType::CONCATENATION:
@@ -50,7 +50,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
case V1_0::OperationType::MAX_POOL_2D:
return ConvertMaxPool2d(operation, model, data);
case V1_0::OperationType::MUL:
- return ConvertMul(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, armnn::BinaryOperation::Mul);
case V1_0::OperationType::RELU:
return ConvertReLu(operation, model, data);
case V1_0::OperationType::RELU1:
@@ -73,12 +73,6 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
}
}
-bool HalPolicy::ConvertAdd(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_0::HalPolicy::ConvertAdd()");
- return ::ConvertAdd<hal_1_0::HalPolicy>(operation, model, data);
-}
-
bool HalPolicy::ConvertAveragePool2d(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_0::HalPolicy::ConvertAveragePool2d()");
@@ -115,6 +109,15 @@ bool HalPolicy::ConvertDequantize(const Operation& operation, const Model& model
return ::ConvertDequantize<hal_1_0::HalPolicy>(operation, model, data);
}
+bool HalPolicy::ConvertElementwiseBinary(const Operation& operation,
+ const Model& model,
+ ConversionData& data,
+ armnn::BinaryOperation binaryOperation)
+{
+ ALOGV("hal_1_0::HalPolicy::ConvertElementwiseBinary()");
+ return ::ConvertElementwiseBinary<hal_1_0::HalPolicy>(operation, model, data, binaryOperation);
+}
+
bool HalPolicy::ConvertFloor(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_0::HalPolicy::ConvertFloor()");
@@ -464,10 +467,12 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
}
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsLstmSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputStateInInfo,
cellStateInInfo,
@@ -484,6 +489,7 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
// Add the layer
armnn::IConnectableLayer* layer = data.m_Network->AddLstmLayer(desc, params, "Lstm");
+ layer->SetBackendId(setBackend);
input.Connect(layer->GetInputSlot(0));
outputStateIn.Connect(layer->GetInputSlot(1));
@@ -513,12 +519,6 @@ bool HalPolicy::ConvertMaxPool2d(const Operation& operation, const Model& model,
return ConvertPooling2d<hal_1_0::HalPolicy>(operation, __func__, armnn::PoolingAlgorithm::Max, model, data);
}
-bool HalPolicy::ConvertMul(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_0::HalPolicy::ConvertMul()");
- return ::ConvertMul<hal_1_0::HalPolicy>(operation, model, data);
-}
-
bool HalPolicy::ConvertReLu(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_0::HalPolicy::ConvertReLu()");
@@ -566,10 +566,12 @@ bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, C
}
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsSoftmaxSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
outputInfo,
desc);
@@ -579,7 +581,11 @@ bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, C
}
armnn::IConnectableLayer* layer = data.m_Network->AddSoftmaxLayer(desc);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the SoftmaxLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<hal_1_0::HalPolicy>(operation, 0, *layer, model, data);
@@ -604,13 +610,12 @@ bool HalPolicy::ConvertSpaceToDepth(const Operation& operation, const Model& mod
}
armnn::SpaceToDepthDescriptor desc;
- bool dataLayoutCheck;
GetInputScalar<hal_1_0::HalPolicy>(operation, 1, OperandType::INT32, desc.m_BlockSize, model, data);
if (desc.m_BlockSize <= 1)
{
- return Fail("%s: Block size must be at least 1 in all dimensions");
+ return Fail("%s: Block size must be at least 1 in all dimensions", __func__);
}
const Operand* output = GetOutputOperand<hal_1_0::HalPolicy>(operation, 0, model);
@@ -626,10 +631,12 @@ bool HalPolicy::ConvertSpaceToDepth(const Operation& operation, const Model& mod
}
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsSpaceToDepthSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
desc);
@@ -639,7 +646,11 @@ bool HalPolicy::ConvertSpaceToDepth(const Operation& operation, const Model& mod
}
armnn::IConnectableLayer* const layer = data.m_Network->AddSpaceToDepthLayer(desc);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the SpaceToDepthLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<hal_1_0::HalPolicy>(operation, 0, *layer, model, data);
@@ -686,10 +697,12 @@ bool HalPolicy::ConvertResizeBilinear(const Operation& operation, const Model& m
desc.m_DataLayout = armnn::DataLayout::NHWC;
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsResizeSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
desc);
@@ -705,9 +718,11 @@ bool HalPolicy::ConvertResizeBilinear(const Operation& operation, const Model& m
}
armnn::IConnectableLayer* layer = data.m_Network->AddResizeLayer(desc);
-
- assert(layer != nullptr);
-
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ResizeLayer", __func__);
+ }
layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
input.Connect(layer->GetInputSlot(0));
diff --git a/1.0/HalPolicy.hpp b/1.0/HalPolicy.hpp
index 25bc47ce..5d92f0d6 100644
--- a/1.0/HalPolicy.hpp
+++ b/1.0/HalPolicy.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2021,2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -31,8 +31,6 @@ public:
static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data);
private:
- static bool ConvertAdd(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertAveragePool2d(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertConcatenation(const Operation& operation, const Model& model, ConversionData& data);
@@ -45,6 +43,11 @@ private:
static bool ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data);
+ static bool ConvertElementwiseBinary(const Operation& operation,
+ const Model& model,
+ ConversionData& data,
+ armnn::BinaryOperation binaryOperation);
+
static bool ConvertFloor(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertFullyConnected(const Operation& operation, const Model& model, ConversionData& data);
@@ -63,8 +66,6 @@ private:
static bool ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data);
- static bool ConvertMul(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertReLu(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertReLu1(const Operation& operation, const Model& model, ConversionData& data);
diff --git a/1.1/HalPolicy.cpp b/1.1/HalPolicy.cpp
index 53a884ca..cd59cd6b 100644
--- a/1.1/HalPolicy.cpp
+++ b/1.1/HalPolicy.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2019,2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -80,9 +80,9 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
switch (operation.type)
{
case V1_1::OperationType::DIV:
- return ConvertDiv(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, armnn::BinaryOperation::Div);
case V1_1::OperationType::SUB:
- return ConvertSub(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, armnn::BinaryOperation::Sub);
case V1_1::OperationType::MEAN:
return ConvertMean(operation, model, data);
case V1_1::OperationType::PAD:
@@ -104,16 +104,13 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
}
}
-bool HalPolicy::ConvertDiv(const Operation& operation, const Model& model, ConversionData& data)
+bool HalPolicy::ConvertElementwiseBinary(const Operation& operation,
+ const Model& model,
+ ConversionData& data,
+ armnn::BinaryOperation binaryOperation)
{
- ALOGV("hal_1_1::HalPolicy::ConvertDiv()");
- return ::ConvertDiv<hal_1_1::HalPolicy>(operation, model, data);
-}
-
-bool HalPolicy::ConvertSub(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_1::HalPolicy::ConvertSub()");
- return ::ConvertSub<hal_1_1::HalPolicy>(operation, model, data);
+ ALOGV("hal_1_1::HalPolicy::ConvertElementwiseBinary()");
+ return ::ConvertElementwiseBinary<hal_1_1::HalPolicy>(operation, model, data, binaryOperation);
}
bool HalPolicy::ConvertMean(const Operation& operation, const Model& model, ConversionData& data)
diff --git a/1.1/HalPolicy.hpp b/1.1/HalPolicy.hpp
index 18bb705c..e1feb830 100644
--- a/1.1/HalPolicy.hpp
+++ b/1.1/HalPolicy.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2021,2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -31,8 +31,11 @@ public:
static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data);
private:
- static bool ConvertDiv(const Operation& operation, const Model& model, ConversionData& data);
- static bool ConvertSub(const Operation& operation, const Model& model, ConversionData& data);
+ static bool ConvertElementwiseBinary(const Operation& operation,
+ const Model& model,
+ ConversionData& data,
+ armnn::BinaryOperation binaryOperation);
+
static bool ConvertMean(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertPad(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertSpaceToBatchNd(const Operation& operation, const Model& model, ConversionData& data);
diff --git a/1.2/ArmnnDriver.hpp b/1.2/ArmnnDriver.hpp
index 5227272f..c855b527 100644
--- a/1.2/ArmnnDriver.hpp
+++ b/1.2/ArmnnDriver.hpp
@@ -19,6 +19,8 @@
#include "../1.0/ArmnnDriverImpl.hpp"
#include "../1.0/HalPolicy.hpp"
+#include <armnn/BackendHelper.hpp>
+
#include <log/log.h>
namespace armnn_driver
@@ -129,26 +131,32 @@ public:
Return<void> getType(getType_cb cb)
{
ALOGV("hal_1_2::ArmnnDriver::getType()");
-
- cb(V1_0::ErrorStatus::NONE, V1_2::DeviceType::CPU);
+ const auto device_type = hal_1_2::HalPolicy::GetDeviceTypeFromOptions(this->m_Options);
+ cb(V1_0::ErrorStatus::NONE, device_type);
return Void();
}
Return<V1_0::ErrorStatus> prepareModelFromCache(
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const HidlToken&,
- const android::sp<V1_2::IPreparedModelCallback>& callback)
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb)
{
ALOGV("hal_1_2::ArmnnDriver::prepareModelFromCache()");
- callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr);
- return V1_0::ErrorStatus::GENERAL_FAILURE;
+ return ArmnnDriverImpl::prepareModelFromCache(m_Runtime,
+ m_Options,
+ modelCacheHandle,
+ dataCacheHandle,
+ token,
+ cb);
}
- Return<V1_0::ErrorStatus> prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
- const android::sp<V1_2::IPreparedModelCallback>& cb)
+ Return<V1_0::ErrorStatus> prepareModel_1_2(
+ const V1_2::Model& model, V1_1::ExecutionPreference preference,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb)
{
ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2()");
@@ -165,6 +173,9 @@ public:
m_ClTunedParameters,
m_Options,
model,
+ modelCacheHandle,
+ dataCacheHandle,
+ token,
cb,
model.relaxComputationFloat32toFloat16
&& m_Options.GetFp16Enabled());
@@ -198,9 +209,12 @@ public:
Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)
{
ALOGV("hal_1_2::ArmnnDriver::getSupportedExtensions()");
-
- // Set both numbers to be 0 for cache not supported.
- cb(V1_0::ErrorStatus::NONE, 0, 0);
+ unsigned int numberOfCachedModelFiles = 0;
+ for (auto& backend : m_Options.GetBackends())
+ {
+ numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+ }
+ cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles, 1ul);
return Void();
}
};
diff --git a/1.2/ArmnnDriverImpl.cpp b/1.2/ArmnnDriverImpl.cpp
index ccf82d0e..f0a426fa 100644
--- a/1.2/ArmnnDriverImpl.cpp
+++ b/1.2/ArmnnDriverImpl.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -8,7 +8,11 @@
#include "../ModelToINetworkConverter.hpp"
#include "../SystemPropertiesUtils.hpp"
+#include <armnnDeserializer/IDeserializer.hpp>
+
#include <log/log.h>
+#include <sys/stat.h>
+#include <chrono>
namespace
{
@@ -90,11 +94,16 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
const DriverOptions& options,
const V1_2::Model& model,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
const android::sp<V1_2::IPreparedModelCallback>& cb,
bool float32ToFloat16)
{
ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_2()");
+ std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now();
+
if (cb.get() == nullptr)
{
ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
@@ -127,20 +136,55 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
// Serialize the network graph to a .armnn file if an output directory
// has been specified in the drivers' arguments.
+ std::vector<uint8_t> dataCacheData;
+ bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
auto serializedNetworkFileName =
- SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
+ SerializeNetwork(*modelConverter.GetINetwork(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ dataCacheData,
+ serializeToFile);
// Optimize the network
armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
- armnn::OptimizerOptions OptOptions;
- OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+ armnn::OptimizerOptionsOpaque OptOptions;
+ OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
+ OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
+
+ int cachedFd = -1;
+ bool saveCachedNetwork = options.SaveCachedNetwork();
+
+ unsigned int numberOfCachedModelFiles = 0;
+ if (modelCacheHandle.size() > 0)
+ {
+ unsigned int index = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ // modelCacheHandle size should be equal to numberOfCachedModelFiles
+ // modelCacheHandle vector should be in same order as backends
+ auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+ if (numberOfCacheFiles > 0)
+ {
+ numberOfCachedModelFiles += numberOfCacheFiles;
+ if (modelCacheHandle[index]->numFds == 1)
+ {
+ if (backend == armnn::Compute::GpuAcc)
+ {
+ cachedFd = modelCacheHandle[index]->data[0];
+ saveCachedNetwork = true;
+ }
+ }
+ index += numberOfCachedModelFiles;
+ }
+ }
+ }
armnn::BackendOptions gpuAcc("GpuAcc",
{
{ "FastMathEnabled", options.IsFastMathEnabled() },
- { "SaveCachedNetwork", options.SaveCachedNetwork() },
+ { "SaveCachedNetwork", saveCachedNetwork },
{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
- { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }
+ { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+ { "CachedFileDescriptor", cachedFd }
});
armnn::BackendOptions cpuAcc("CpuAcc",
@@ -148,8 +192,8 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
{ "FastMathEnabled", options.IsFastMathEnabled() },
{ "NumberOfThreads", options.GetNumberOfThreads() }
});
- OptOptions.m_ModelOptions.push_back(gpuAcc);
- OptOptions.m_ModelOptions.push_back(cpuAcc);
+ OptOptions.AddModelOption(gpuAcc);
+ OptOptions.AddModelOption(cpuAcc);
std::vector<std::string> errMessages;
try
@@ -188,11 +232,19 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
// Load it into the runtime.
armnn::NetworkId netId = 0;
+ std::string msg;
+ armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+ MemorySource::Undefined,
+ MemorySource::Undefined,
+ options.IsGpuProfilingEnabled());
+
+ auto numInputs = getMainModel(model).inputIndexes.size();
+ auto numOutputs = getMainModel(model).outputIndexes.size();
try
{
- if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
+ if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
{
- return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
+ return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb);
}
}
catch (std::exception& e)
@@ -216,32 +268,388 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
runtime.get(),
model,
options.GetRequestInputsAndOutputsDumpDir(),
- options.IsGpuProfilingEnabled()));
+ options.IsGpuProfilingEnabled(),
+ options.isAsyncModelExecutionEnabled(),
+ options.getNoOfArmnnThreads(),
+ options.isImportEnabled(),
+ options.isExportEnabled()));
// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
- if (!preparedModel->ExecuteWithDummyInputs())
+ // Only run this if the GpuAcc backend has been added to options
+ if (std::find(options.GetBackends().begin(),
+ options.GetBackends().end(),
+ armnn::Compute::GpuAcc) != options.GetBackends().end())
{
- return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+ if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+ {
+ return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+ }
+
+ if (clTunedParameters &&
+ options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+ {
+ // Now that we've done one inference the CL kernel parameters will have been tuned,
+ // so save the updated file.
+ try
+ {
+ clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ }
+ catch (std::exception& error)
+ {
+ ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+ options.GetClTunedParametersFile().c_str(), error.what());
+ }
+ }
}
- if (clTunedParameters &&
- options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+ size_t hashValue = 0;
+ // Cache the model
+ if (dataCacheHandle.size() > 0)
{
- // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
- try
+ // Cache the Arm NN model, should be only 1
+ if (dataCacheHandle.size() != 1)
+ {
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ if (dataCacheHandle[0]->numFds != 1)
{
- clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ return V1_0::ErrorStatus::NONE;
}
- catch (std::exception& error)
+
+ if (dataCacheHandle[0]->data[0] < 0)
+ {
+ ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0");
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+ if (dataCacheFileAccessMode != O_RDWR)
{
- ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
- options.GetClTunedParametersFile().c_str(), error.what());
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_2(): Invalid Access Mode.");
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ return V1_0::ErrorStatus::NONE;
}
+
+ write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
+ hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+ }
+
+ if (modelCacheHandle.size() > 0)
+ {
+ if (modelCacheHandle.size() != numberOfCachedModelFiles)
+ {
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ return V1_0::ErrorStatus::NONE;
+ }
+ for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
+ {
+ if (modelCacheHandle[i]->numFds == 1)
+ {
+ int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
+ if (modelCacheFileAccessMode != O_RDONLY)
+ {
+ struct stat statBuffer;
+ if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
+ {
+ long modelDataSize = statBuffer.st_size;
+ if (modelDataSize > 0)
+ {
+ std::vector <uint8_t> modelData(modelDataSize);
+ pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
+ hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+ }
+ }
+ }
+ }
+ }
+ }
+ if (hashValue != 0)
+ {
+ CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
}
NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+ ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - prepareModelTimepoint).count());
+
+ return V1_0::ErrorStatus::NONE;
+}
+
+Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache(
+ const armnn::IRuntimePtr& runtime,
+ const DriverOptions& options,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb,
+ bool float32ToFloat16)
+{
+ ALOGV("ArmnnDriverImpl::prepareModelFromCache()");
+ std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now();
+
+ if (cb.get() == nullptr)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid callback passed to prepareModel");
+ return V1_0::ErrorStatus::INVALID_ARGUMENT;
+ }
+
+ if (!runtime)
+ {
+ return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
+ }
+
+ if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid token passed!", cb);
+ return V1_0::ErrorStatus::INVALID_ARGUMENT;
+ }
+
+ // DataCacheHandle size should always be 1
+ // Arm NN model
+ if (dataCacheHandle.size() != 1)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // Check if model files cached they match the expected value
+ unsigned int numberOfCachedModelFiles = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+ }
+ if (modelCacheHandle.size() != numberOfCachedModelFiles)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid model cache!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ if (dataCacheHandle[0]->numFds != 1)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, numFds != 1.");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ if (dataCacheHandle[0]->data[0] < 0)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, fd < 0");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+ if (dataCacheFileAccessMode != O_RDWR)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+ if (dataSize == 0)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ int offset = 0;
+ {
+ struct stat statBuffer;
+ if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
+ {
+ unsigned long bufferSize = statBuffer.st_size;
+ if (bufferSize != dataSize)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+ }
+ }
+ std::vector<uint8_t> dataCacheData(dataSize);
+ pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
+ auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+ int gpuAccCachedFd = -1;
+ bool saveCachedNetwork = false;
+ if (modelCacheHandle.size() > 0)
+ {
+ unsigned int index = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ // modelCacheHandle size should be equal to numberOfCachedModelFiles
+ // modelCacheHandle vector should be in same order as backends
+ auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+ if (numberOfCacheFiles > 0)
+ {
+ if (modelCacheHandle[index]->numFds != 1)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the model cache, numFds != 1.");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE,
+ "Cannot read from the model cache, numFds != 1.", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+ auto cachedFd = modelCacheHandle[index]->data[0];
+
+ int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
+ if (modelCacheFileAccessMode != O_RDWR)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ struct stat statBuffer;
+ if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
+ {
+ long modelDataSize = statBuffer.st_size;
+ if (modelDataSize <= 0)
+ {
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Wrong cached model size!", cb);
+ return V1_0::ErrorStatus::NONE;
+ }
+ std::vector<uint8_t> modelData(modelDataSize);
+ pread(cachedFd, modelData.data(), modelData.size(), 0);
+ hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+ // For GpuAcc numberOfCachedFiles is 1
+ if (backend == armnn::Compute::GpuAcc)
+ {
+ gpuAccCachedFd = cachedFd;
+ }
+ }
+ index += numberOfCacheFiles;
+ }
+ }
+ }
+
+ if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache: ValidateHash() failed!");
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ValidateHash Failed!", cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // Deserialize the network..
+ armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
+ try
+ {
+ network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+ }
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what() << ") caught from Deserializer.";
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_0::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // Optimize the network
+ armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+ armnn::OptimizerOptionsOpaque OptOptions;
+ OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
+ OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
+
+ armnn::BackendOptions gpuAcc("GpuAcc",
+ {
+ {"FastMathEnabled", options.IsFastMathEnabled()},
+ {"SaveCachedNetwork", saveCachedNetwork},
+ {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
+ {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()},
+ {"CachedFileDescriptor", gpuAccCachedFd}
+ });
+
+ armnn::BackendOptions cpuAcc("CpuAcc",
+ {
+ {"FastMathEnabled", options.IsFastMathEnabled()},
+ {"NumberOfThreads", options.GetNumberOfThreads()}
+ });
+ OptOptions.AddModelOption(gpuAcc);
+ OptOptions.AddModelOption(cpuAcc);
+
+ std::vector<std::string> errMessages;
+ try
+ {
+ optNet = armnn::Optimize(*network.get(),
+ options.GetBackends(),
+ runtime->GetDeviceSpec(),
+ OptOptions,
+ errMessages);
+ }
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what() << ") caught from optimize.";
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ // Check that the optimized network is valid.
+ if (!optNet)
+ {
+ std::stringstream message;
+ message << "Invalid optimized network";
+ for (const std::string& msg : errMessages)
+ {
+ message << "\n" << msg;
+ }
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ // Export the optimized network graph to a dot file if an output dump directory
+ // has been specified in the drivers' arguments.
+ std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+ options.GetRequestInputsAndOutputsDumpDir());
+
+ // Load it into the runtime.
+ armnn::NetworkId netId = 0;
+ std::string msg;
+ armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+ MemorySource::Undefined,
+ MemorySource::Undefined,
+ options.IsGpuProfilingEnabled());
+
+ try
+ {
+ if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+ {
+ return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb);
+ }
+ }
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what() << ") caught from LoadNetwork.";
+ FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
+ new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
+ netId,
+ runtime.get(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ options.IsGpuProfilingEnabled(),
+ options.isAsyncModelExecutionEnabled(),
+ options.getNoOfArmnnThreads(),
+ options.isImportEnabled(),
+ options.isExportEnabled(),
+ true));
+
+ NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+
+ ALOGV("ArmnnDriverImpl::prepareModelFromCache cache timing = %lld µs",
+ std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - modelFromCacheTimepoint).count());
+
return V1_0::ErrorStatus::NONE;
}
diff --git a/1.2/ArmnnDriverImpl.hpp b/1.2/ArmnnDriverImpl.hpp
index eeb491b6..70f46cba 100644
--- a/1.2/ArmnnDriverImpl.hpp
+++ b/1.2/ArmnnDriverImpl.hpp
@@ -7,10 +7,13 @@
#include <HalInterfaces.h>
+#include "../CacheDataHandler.hpp"
#include "../DriverOptions.hpp"
#include <armnn/ArmNN.hpp>
+#include <NeuralNetworks.h>
+
#ifdef ARMNN_ANDROID_R
using namespace android::nn::hal;
#endif
@@ -30,12 +33,27 @@ namespace hal_1_2
class ArmnnDriverImpl
{
public:
- static Return<V1_0::ErrorStatus> prepareArmnnModel_1_2(const armnn::IRuntimePtr& runtime,
- const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
- const DriverOptions& options,
- const V1_2::Model& model,
- const android::sp<V1_2::IPreparedModelCallback>& cb,
- bool float32ToFloat16 = false);
+ using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+ static Return<V1_0::ErrorStatus> prepareArmnnModel_1_2(
+ const armnn::IRuntimePtr& runtime,
+ const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+ const DriverOptions& options,
+ const V1_2::Model& model,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb,
+ bool float32ToFloat16 = false);
+
+ static Return<V1_0::ErrorStatus> prepareModelFromCache(
+ const armnn::IRuntimePtr& runtime,
+ const DriverOptions& options,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb,
+ bool float32ToFloat16 = false);
static Return<void> getCapabilities_1_2(const armnn::IRuntimePtr& runtime,
V1_2::IDevice::getCapabilities_1_2_cb cb);
diff --git a/1.2/HalPolicy.cpp b/1.2/HalPolicy.cpp
index e96c4cb0..9c44003f 100644
--- a/1.2/HalPolicy.cpp
+++ b/1.2/HalPolicy.cpp
@@ -1,9 +1,10 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2019-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "HalPolicy.hpp"
+#include "DriverOptions.hpp"
namespace armnn_driver
{
@@ -17,6 +18,33 @@ namespace
} // anonymous namespace
+HalPolicy::DeviceType HalPolicy::GetDeviceTypeFromOptions(const DriverOptions& options)
+{
+ // Query backends list from the options
+ auto backends = options.GetBackends();
+ // Return first backend
+ if(backends.size()>0)
+ {
+ const auto &first_backend = backends[0];
+ if(first_backend.IsCpuAcc()||first_backend.IsCpuRef())
+ {
+ return V1_2::DeviceType::CPU;
+ }
+ else if(first_backend.IsGpuAcc())
+ {
+ return V1_2::DeviceType::GPU;
+ }
+ else
+ {
+ return V1_2::DeviceType::ACCELERATOR;
+ }
+ }
+ else
+ {
+ return V1_2::DeviceType::CPU;
+ }
+}
+
bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, ConversionData& data)
{
switch (operation.type)
@@ -24,7 +52,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
case V1_2::OperationType::ABS:
return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Abs);
case V1_2::OperationType::ADD:
- return ConvertAdd(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Add);
case V1_2::OperationType::ARGMAX:
return ConvertArgMinMax(operation, model, data, ArgMinMaxFunction::Max);
case V1_2::OperationType::ARGMIN:
@@ -33,6 +61,10 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
return ConvertAveragePool2d(operation, model, data);
case V1_2::OperationType::BATCH_TO_SPACE_ND:
return ConvertBatchToSpaceNd(operation, model, data);
+ case V1_2::OperationType::CAST:
+ return ConvertCast(operation, model, data);
+ case V1_2::OperationType::CHANNEL_SHUFFLE:
+ return ConvertChannelShuffle(operation, model, data);
case V1_2::OperationType::CONCATENATION:
return ConvertConcatenation(operation, model, data);
case V1_2::OperationType::CONV_2D:
@@ -44,7 +76,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
case V1_2::OperationType::DEQUANTIZE:
return ConvertDequantize(operation, model, data);
case V1_2::OperationType::DIV:
- return ConvertDiv(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Div);
case V1_2::OperationType::EQUAL:
return ConvertComparison(operation, model, data, ComparisonOperation::Equal);
case V1_2::OperationType::EXP:
@@ -75,6 +107,8 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
return ConvertComparison(operation, model, data, ComparisonOperation::LessOrEqual);
case V1_2::OperationType::LOCAL_RESPONSE_NORMALIZATION:
return ConvertLocalResponseNormalization(operation, model, data);
+ case V1_2::OperationType::LOG:
+ return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Log);
case V1_2::OperationType::LOGISTIC:
return ConvertLogistic(operation, model, data);
case V1_2::OperationType::LOG_SOFTMAX:
@@ -84,13 +118,13 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
case V1_2::OperationType::MAX_POOL_2D:
return ConvertMaxPool2d(operation, model, data);
case V1_2::OperationType::MAXIMUM:
- return ConvertMaximum(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Maximum);
case V1_2::OperationType::MEAN:
return ConvertMean(operation, model, data);
case V1_2::OperationType::MINIMUM:
- return ConvertMinimum(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Minimum);
case V1_2::OperationType::MUL:
- return ConvertMul(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Mul);
case V1_2::OperationType::NEG:
return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Neg);
case V1_2::OperationType::NOT_EQUAL:
@@ -99,6 +133,10 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
return ConvertPad(operation, model, data);
case V1_2::OperationType::PAD_V2:
return ConvertPadV2(operation, model, data);
+// There's a problem with the combination of Hal 1.2, Android Q and the POW operator. The problem does not happen
+// with Hal 1.3.
+// case V1_2::OperationType::POW:
+// return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Power);
case V1_2::OperationType::PRELU:
return ConvertPrelu(operation, model, data);
case V1_2::OperationType::QUANTIZE:
@@ -109,6 +147,8 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
return ConvertReduce(operation, model, data, ReduceOperation::Max);
case V1_2::OperationType::REDUCE_MIN:
return ConvertReduce(operation, model, data, ReduceOperation::Min);
+ case V1_2::OperationType::REDUCE_PROD:
+ return ConvertReduce(operation, model, data, ReduceOperation::Prod);
case V1_2::OperationType::REDUCE_SUM:
return ConvertReduce(operation, model, data, ReduceOperation::Sum);
case V1_2::OperationType::RELU:
@@ -125,38 +165,40 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
return ConvertResize(operation, model, data, ResizeMethod::NearestNeighbor);
case V1_2::OperationType::RSQRT:
return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Rsqrt);
+ case V1_2::OperationType::SIN:
+ return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Sin);
+ case V1_2::OperationType::SOFTMAX:
+ return ConvertSoftmax(operation, model, data);
+ case V1_2::OperationType::SPACE_TO_BATCH_ND :
+ return ConvertSpaceToBatchNd(operation, model, data);
+ case V1_2::OperationType::SPACE_TO_DEPTH:
+ return ConvertSpaceToDepth(operation, model, data);
+ case V1_2::OperationType::SPLIT:
+ return ConvertSplit(operation, model, data);
case V1_2::OperationType::SQRT:
return ConvertSqrt(operation, model, data);
case V1_2::OperationType::SQUEEZE:
return ConvertSqueeze(operation, model, data);
case V1_2::OperationType::STRIDED_SLICE:
return ConvertStridedSlice(operation, model, data);
+ case V1_2::OperationType::SUB:
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Sub);
case V1_2::OperationType::TRANSPOSE:
return ConvertTranspose(operation, model, data);
case V1_2::OperationType::TRANSPOSE_CONV_2D:
return ConvertTransposeConv2d(operation, model, data);
- case V1_2::OperationType::SOFTMAX:
- return ConvertSoftmax(operation, model, data);
- case V1_2::OperationType::SPACE_TO_BATCH_ND :
- return ConvertSpaceToBatchNd(operation, model, data);
- case V1_2::OperationType::SPACE_TO_DEPTH:
- return ConvertSpaceToDepth(operation, model, data);
- case V1_2::OperationType::SUB:
- return ConvertSub(operation, model, data);
case V1_2::OperationType::TANH:
return ConvertTanH(operation, model, data);
+ case V1_2::OperationType::TILE:
+ return ConvertTile(operation, model, data);
+ case V1_2::OperationType::UNIDIRECTIONAL_SEQUENCE_LSTM:
+ return ConvertUnidirectionalSequenceLstm(operation, model, data);
default:
return Fail("%s: Operation type %s not supported in ArmnnDriver",
__func__, toString(operation.type).c_str());
}
}
-bool HalPolicy::ConvertAdd(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_2::HalPolicy::ConvertAdd()");
- return ::ConvertAdd<hal_1_2::HalPolicy>(operation, model, data);
-}
-
bool HalPolicy::ConvertArgMinMax(const V1_2::Operation& operation,
const V1_2::Model& model,
ConversionData& data,
@@ -178,6 +220,18 @@ bool HalPolicy::ConvertBatchToSpaceNd(const Operation& operation, const Model& m
return ::ConvertBatchToSpaceNd<hal_1_2::HalPolicy>(operation, model, data);
}
+bool HalPolicy::ConvertCast(const Operation& operation, const Model& model, ConversionData& data)
+{
+ ALOGV("hal_1_2::HalPolicy::ConvertCast()");
+ return ::ConvertCast<hal_1_2::HalPolicy>(operation, model, data);
+}
+
+bool HalPolicy::ConvertChannelShuffle(const Operation& operation, const Model& model, ConversionData& data)
+{
+ ALOGV("hal_1_2::HalPolicy::ConvertChannelShuffle()");
+ return ::ConvertChannelShuffle<hal_1_2::HalPolicy>(operation, model, data);
+}
+
bool HalPolicy::ConvertComparison(const Operation& operation,
const Model& model,
ConversionData& data,
@@ -217,10 +271,13 @@ bool HalPolicy::ConvertDequantize(const Operation& operation, const Model& model
return ::ConvertDequantize_1_2<hal_1_2::HalPolicy>(operation, model, data);
}
-bool HalPolicy::ConvertDiv(const Operation& operation, const Model& model, ConversionData& data)
+bool HalPolicy::ConvertElementwiseBinary(const Operation& operation,
+ const Model& model,
+ ConversionData& data,
+ BinaryOperation binaryOperation)
{
- ALOGV("hal_1_2::HalPolicy::ConvertDiv()");
- return ::ConvertDiv<hal_1_2::HalPolicy>(operation, model, data);
+ ALOGV("hal_1_2::HalPolicy::ConvertElementwiseBinary()");
+ return ::ConvertElementwiseBinary<hal_1_2::HalPolicy>(operation, model, data, binaryOperation);
}
bool HalPolicy::ConvertElementwiseUnary(const Operation& operation,
@@ -306,30 +363,12 @@ bool HalPolicy::ConvertMaxPool2d(const Operation& operation, const Model& model,
return ConvertPooling2d<hal_1_2::HalPolicy>(operation, __func__, PoolingAlgorithm::Max, model, data);
}
-bool HalPolicy::ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_2::HalPolicy::ConvertMaximum()");
- return ::ConvertMaximum<hal_1_2::HalPolicy>(operation, model, data);
-}
-
bool HalPolicy::ConvertMean(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_2::HalPolicy::ConvertMean()");
return ::ConvertMean<hal_1_2::HalPolicy>(operation, model, data);
}
-bool HalPolicy::ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_2::HalPolicy::ConvertMinimum()");
- return ::ConvertMinimum<hal_1_2::HalPolicy>(operation, model, data);
-}
-
-bool HalPolicy::ConvertMul(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_2::HalPolicy::ConvertMul()");
- return ::ConvertMul<hal_1_2::HalPolicy>(operation, model, data);
-}
-
bool HalPolicy::ConvertPad(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_2::HalPolicy::ConvertPad()");
@@ -420,12 +459,6 @@ bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, C
return ::ConvertSoftmax<hal_1_2::HalPolicy>(operation, model, data);
}
-bool HalPolicy::ConvertSub(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_2::HalPolicy::ConvertSub()");
- return ::ConvertSub<hal_1_2::HalPolicy>(operation, model, data);
-}
-
bool HalPolicy::ConvertTanH(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_2::HalPolicy::ConvertTanH()");
@@ -438,6 +471,12 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
return ::ConvertLstm<hal_1_2::HalPolicy>(operation, model, data);
}
+bool HalPolicy::ConvertSplit(const Operation& operation, const Model& model, ConversionData& data)
+{
+ ALOGV("hal_1_2::HalPolicy::ConvertSplit()");
+ return ::ConvertSplit<hal_1_2::HalPolicy>(operation, model, data);
+}
+
bool HalPolicy::ConvertSqrt(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_2::HalPolicy::ConvertSqrt()");
@@ -471,5 +510,17 @@ bool HalPolicy::ConvertTransposeConv2d(const Operation& operation, const Model&
return ::ConvertTransposeConv2d<hal_1_2::HalPolicy>(operation, model, data);
}
+bool HalPolicy::ConvertTile(const Operation& operation, const Model& model, ConversionData& data)
+{
+ ALOGV("hal_1_2::HalPolicy::ConvertTile()");
+ return ::ConvertTile<hal_1_2::HalPolicy>(operation, model, data);
+}
+
+bool HalPolicy::ConvertUnidirectionalSequenceLstm(const Operation& operation, const Model& model, ConversionData& data)
+{
+ ALOGV("hal_1_2::HalPolicy::ConvertUnidirectionalSequenceLstm()");
+ return ::ConvertUnidirectionalSequenceLstm<hal_1_2::HalPolicy>(operation, model, data);
+}
+
} // namespace hal_1_2
} // namespace armnn_driver
diff --git a/1.2/HalPolicy.hpp b/1.2/HalPolicy.hpp
index abd60e72..4d77dfe5 100644
--- a/1.2/HalPolicy.hpp
+++ b/1.2/HalPolicy.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2019-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -16,6 +16,7 @@ namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
namespace armnn_driver
{
+class DriverOptions;
namespace hal_1_2
{
@@ -31,12 +32,13 @@ public:
using ExecutionCallback = V1_2::IExecutionCallback;
using getSupportedOperations_cb = V1_2::IDevice::getSupportedOperations_1_2_cb;
using ErrorStatus = V1_0::ErrorStatus;
+ using DeviceType = V1_2::DeviceType;
+
+ static DeviceType GetDeviceTypeFromOptions(const DriverOptions& options);
static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data);
private:
- static bool ConvertAdd(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertArgMinMax(const Operation& operation,
const Model& model,
ConversionData& data,
@@ -46,6 +48,10 @@ private:
static bool ConvertBatchToSpaceNd(const Operation& operation, const Model& model, ConversionData& data);
+ static bool ConvertCast(const Operation& operation, const Model& model, ConversionData& data);
+
+ static bool ConvertChannelShuffle(const Operation& operation, const Model& model, ConversionData& data);
+
static bool ConvertComparison(const Operation& operation,
const Model& model,
ConversionData& data,
@@ -61,10 +67,13 @@ private:
static bool ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data);
- static bool ConvertDiv(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertExpandDims(const Operation& operation, const Model& model, ConversionData& data);
+ static bool ConvertElementwiseBinary(const Operation& operation,
+ const Model& model,
+ ConversionData& data,
+ armnn::BinaryOperation binaryOperation);
+
static bool ConvertElementwiseUnary(const Operation& operation,
const Model& model,
ConversionData& data,
@@ -96,14 +105,8 @@ private:
static bool ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data);
- static bool ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertMean(const Operation& operation, const Model& model, ConversionData& data);
- static bool ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data);
-
- static bool ConvertMul(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertPad(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertPadV2(const Operation& operation, const Model& model, ConversionData& data);
@@ -138,19 +141,25 @@ private:
static bool ConvertSpaceToDepth(const Operation& operation, const Model& model, ConversionData& data);
+ static bool ConvertSplit(const Operation& operation, const Model& model, ConversionData& data);
+
static bool ConvertSqrt(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertSqueeze(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertStridedSlice(const Operation& operation, const Model& model, ConversionData& data);
- static bool ConvertSub(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertTanH(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertTranspose(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertTransposeConv2d(const Operation& operation, const Model& model, ConversionData& data);
+
+ static bool ConvertTile(const Operation& operation, const Model& model, ConversionData& data);
+
+ static bool ConvertUnidirectionalSequenceLstm(const Operation& operation,
+ const Model& model,
+ ConversionData& data);
};
} // namespace hal_1_2
diff --git a/1.3/ArmnnDriver.hpp b/1.3/ArmnnDriver.hpp
index 451b5ab5..6d2e0b7a 100644
--- a/1.3/ArmnnDriver.hpp
+++ b/1.3/ArmnnDriver.hpp
@@ -21,6 +21,8 @@
#include "../1.0/ArmnnDriverImpl.hpp"
#include "../1.0/HalPolicy.hpp"
+#include <armnn/BackendHelper.hpp>
+
#include <log/log.h>
namespace armnn_driver
@@ -31,6 +33,7 @@ namespace hal_1_3
class ArmnnDriver : public ArmnnDevice, public V1_3::IDevice
{
public:
+ using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
ArmnnDriver(DriverOptions options)
: ArmnnDevice(std::move(options))
@@ -39,9 +42,7 @@ public:
}
~ArmnnDriver() {}
- using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
-public:
Return<void> getCapabilities(V1_0::IDevice::getCapabilities_cb cb) override
{
ALOGV("hal_1_3::ArmnnDriver::getCapabilities()");
@@ -131,10 +132,13 @@ public:
cb);
}
- Return<V1_0::ErrorStatus> prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
- const android::sp<V1_2::IPreparedModelCallback>& cb)
+ Return<V1_0::ErrorStatus> prepareModel_1_2(
+ const V1_2::Model& model,
+ V1_1::ExecutionPreference preference,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb)
{
ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_2()");
@@ -151,6 +155,9 @@ public:
m_ClTunedParameters,
m_Options,
model,
+ modelCacheHandle,
+ dataCacheHandle,
+ token,
cb,
model.relaxComputationFloat32toFloat16
&& m_Options.GetFp16Enabled());
@@ -174,14 +181,15 @@ public:
cb);
}
- Return<V1_3::ErrorStatus> prepareModel_1_3(const V1_3::Model& model,
- V1_1::ExecutionPreference preference,
- V1_3::Priority priority,
- const V1_3::OptionalTimePoint&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const HidlToken&,
- const android::sp<V1_3::IPreparedModelCallback>& cb)
+ Return<V1_3::ErrorStatus> prepareModel_1_3(
+ const V1_3::Model& model,
+ V1_1::ExecutionPreference preference,
+ V1_3::Priority priority,
+ const V1_3::OptionalTimePoint&,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCache,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCache,
+ const HidlToken& token,
+ const android::sp<V1_3::IPreparedModelCallback>& cb)
{
ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_3()");
@@ -199,11 +207,13 @@ public:
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
-
return ArmnnDriverImpl::prepareArmnnModel_1_3(m_Runtime,
m_ClTunedParameters,
m_Options,
model,
+ modelCache,
+ dataCache,
+ token,
cb,
model.relaxComputationFloat32toFloat16
&& m_Options.GetFp16Enabled(),
@@ -219,10 +229,13 @@ public:
Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)
{
- ALOGV("hal_1_3::ArmnnDriver::getSupportedExtensions()");
-
- // Set both numbers to be 0 for cache not supported.
- cb(V1_0::ErrorStatus::NONE, 0, 0);
+ ALOGV("hal_1_3::ArmnnDriver::getNumberOfCacheFilesNeeded()");
+ unsigned int numberOfCachedModelFiles = 0;
+ for (auto& backend : m_Options.GetBackends())
+ {
+ numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+ }
+ cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles, 1ul);
return Void();
}
@@ -244,32 +257,41 @@ public:
Return<void> getType(getType_cb cb)
{
ALOGV("hal_1_3::ArmnnDriver::getType()");
-
- cb(V1_0::ErrorStatus::NONE, V1_2::DeviceType::CPU);
+ const auto device_type = hal_1_2::HalPolicy::GetDeviceTypeFromOptions(this->m_Options);
+ cb(V1_0::ErrorStatus::NONE, device_type);
return Void();
}
Return<V1_0::ErrorStatus> prepareModelFromCache(
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const HidlToken&,
- const android::sp<V1_2::IPreparedModelCallback>& callback)
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_2::IPreparedModelCallback>& cb)
{
ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()");
- callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr);
- return V1_0::ErrorStatus::GENERAL_FAILURE;
+ return hal_1_2::ArmnnDriverImpl::prepareModelFromCache(m_Runtime,
+ m_Options,
+ modelCacheHandle,
+ dataCacheHandle,
+ token,
+ cb);
}
Return<V1_3::ErrorStatus> prepareModelFromCache_1_3(
const V1_3::OptionalTimePoint&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
- const HidlToken&,
- const android::sp<V1_3::IPreparedModelCallback>& callback)
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_3::IPreparedModelCallback>& cb)
{
- ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()");
- callback->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
- return V1_3::ErrorStatus::GENERAL_FAILURE;
+ ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache_1_3()");
+
+ return ArmnnDriverImpl::prepareModelFromCache_1_3(m_Runtime,
+ m_Options,
+ modelCacheHandle,
+ dataCacheHandle,
+ token,
+ cb);
}
Return<void> allocate(const V1_3::BufferDesc& /*desc*/,
diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp
index 6d8fbe64..ec176d59 100644
--- a/1.3/ArmnnDriverImpl.cpp
+++ b/1.3/ArmnnDriverImpl.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020, 2023 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -8,8 +8,13 @@
#include "../ModelToINetworkConverter.hpp"
#include "../SystemPropertiesUtils.hpp"
+#include <armnnDeserializer/IDeserializer.hpp>
+
#include <log/log.h>
+#include <sys/stat.h>
+#include <chrono>
+
namespace
{
const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
@@ -100,12 +105,17 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
const DriverOptions& options,
const V1_3::Model& model,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
const android::sp<V1_3::IPreparedModelCallback>& cb,
bool float32ToFloat16,
V1_3::Priority priority)
{
ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
+ std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now();
+
if (cb.get() == nullptr)
{
ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
@@ -138,20 +148,56 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
// Serialize the network graph to a .armnn file if an output directory
// has been specified in the drivers' arguments.
+ std::vector<uint8_t> dataCacheData;
+ bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
auto serializedNetworkFileName =
- SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
+ SerializeNetwork(*modelConverter.GetINetwork(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ dataCacheData,
+ serializeToFile);
// Optimize the network
armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
- armnn::OptimizerOptions OptOptions;
- OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+ armnn::OptimizerOptionsOpaque OptOptions;
+ OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
+ OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
+
+ int cachedFd = -1;
+ bool saveCachedNetwork = options.SaveCachedNetwork();
+
+ unsigned int numberOfCachedModelFiles = 0;
+ if (modelCacheHandle.size() > 0)
+ {
+ unsigned int index = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ // modelCacheHandle size should be equal to numberOfCachedModelFiles
+ // modelCacheHandle vector should be in same order as backends
+ auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+ if (numberOfCacheFiles > 0)
+ {
+ numberOfCachedModelFiles += numberOfCacheFiles;
+ if (modelCacheHandle[index]->numFds == 1)
+ {
+ // For GpuAcc numberOfCachedFiles is 1
+ if (backend == armnn::Compute::GpuAcc)
+ {
+ cachedFd = modelCacheHandle[index]->data[0];
+ saveCachedNetwork = true;
+ }
+ }
+ index += numberOfCachedModelFiles;
+ }
+ }
+ }
armnn::BackendOptions gpuAcc("GpuAcc",
{
{ "FastMathEnabled", options.IsFastMathEnabled() },
- { "SaveCachedNetwork", options.SaveCachedNetwork() },
+ { "SaveCachedNetwork", saveCachedNetwork },
{ "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
- { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }
+ { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+ { "CachedFileDescriptor", cachedFd }
});
armnn::BackendOptions cpuAcc("CpuAcc",
@@ -159,8 +205,8 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
{ "FastMathEnabled", options.IsFastMathEnabled() },
{ "NumberOfThreads", options.GetNumberOfThreads() }
});
- OptOptions.m_ModelOptions.push_back(gpuAcc);
- OptOptions.m_ModelOptions.push_back(cpuAcc);
+ OptOptions.AddModelOption(gpuAcc);
+ OptOptions.AddModelOption(cpuAcc);
std::vector<std::string> errMessages;
try
@@ -199,9 +245,17 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
// Load it into the runtime.
armnn::NetworkId netId = 0;
+ std::string msg;
+ armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+ MemorySource::Undefined,
+ MemorySource::Undefined,
+ options.IsGpuProfilingEnabled());
+
+ auto numInputs = getMainModel(model).inputIndexes.size();
+ auto numOutputs = getMainModel(model).outputIndexes.size();
try
{
- if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
+ if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
{
return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
}
@@ -228,32 +282,390 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
model,
options.GetRequestInputsAndOutputsDumpDir(),
options.IsGpuProfilingEnabled(),
- priority));
+ priority,
+ options.isAsyncModelExecutionEnabled(),
+ options.getNoOfArmnnThreads(),
+ options.isImportEnabled(),
+ options.isExportEnabled()));
// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
- if (!preparedModel->ExecuteWithDummyInputs())
+ // Only run this if the GpuAcc backend has been added to options
+ if (std::find(options.GetBackends().begin(),
+ options.GetBackends().end(),
+ armnn::Compute::GpuAcc) != options.GetBackends().end())
+ {
+ if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+ {
+ return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+ }
+
+ if (clTunedParameters &&
+ options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+ {
+ // Now that we've done one inference the CL kernel parameters will have been tuned,
+ // so save the updated file.
+ try
+ {
+ clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ }
+ catch (std::exception& error)
+ {
+ ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+ options.GetClTunedParametersFile().c_str(), error.what());
+ }
+ }
+ }
+ size_t hashValue = 0;
+ // Cache the model
+ if (dataCacheHandle.size() > 0)
+ {
+ // Cache the Arm NN model
+ if (dataCacheHandle.size() != 1)
+ {
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ if (dataCacheHandle[0]->numFds != 1)
+ {
+ ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ if (dataCacheHandle[0]->data[0] < 0)
+ {
+ ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0");
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+ if (dataCacheFileAccessMode != O_RDWR)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode.");
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
+ hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+ }
+
+ // Cache the model data
+ if (modelCacheHandle.size() > 0)
+ {
+ if (modelCacheHandle.size() != numberOfCachedModelFiles)
+ {
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
+ {
+ if (modelCacheHandle[i]->numFds == 1)
+ {
+ int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
+ if (modelCacheFileAccessMode != O_RDONLY)
+ {
+ struct stat statBuffer;
+ if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
+ {
+ long modelDataSize = statBuffer.st_size;
+ if (modelDataSize > 0)
+ {
+ std::vector<uint8_t> modelData(modelDataSize);
+ pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
+ hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+ }
+ }
+ }
+ }
+ }
+ }
+ if (hashValue != 0)
+ {
+ CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
+ }
+
+ NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+
+ ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - prepareModelTimepoint).count());
+
+
+ return V1_3::ErrorStatus::NONE;
+}
+
+Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache_1_3(
+ const armnn::IRuntimePtr& runtime,
+ const DriverOptions& options,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_3::IPreparedModelCallback>& cb)
+{
+ ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()");
+ std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now();
+
+ if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+ {
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ if (cb.get() == nullptr)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3");
+ return V1_3::ErrorStatus::INVALID_ARGUMENT;
+ }
+
+ if (!runtime)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable");
+ return V1_3::ErrorStatus::DEVICE_UNAVAILABLE;
+ }
+
+ // DataCacheHandle size should always be 1
+ // Arm NN model
+ if (dataCacheHandle.size() != 1)
+ {
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // Check if model files cached they match the expected value
+ unsigned int numberOfCachedModelFiles = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+ }
+ if (modelCacheHandle.size() != numberOfCachedModelFiles)
+ {
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ if (dataCacheHandle[0]->numFds != 1)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1.");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ if (dataCacheHandle[0]->data[0] < 0)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, fd < 0");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+ if (dataCacheFileAccessMode != O_RDWR)
{
- return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
}
- if (clTunedParameters &&
- options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+ auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+ if (dataSize == 0)
{
- // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
- try
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ int offset = 0;
+ {
+ struct stat statBuffer;
+ if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
+ {
+ unsigned long bufferSize = statBuffer.st_size;
+ if (bufferSize != dataSize)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+ }
+ }
+ std::vector<uint8_t> dataCacheData(dataSize);
+ pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
+ auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+ int gpuAccCachedFd = -1;
+ bool saveCachedNetwork = false;
+ if (modelCacheHandle.size() > 0)
+ {
+ unsigned int index = 0;
+ for (auto& backend : options.GetBackends())
+ {
+ // modelCacheHandle size should be equal to numberOfCachedModelFiles
+ // modelCacheHandle vector should be in same order as backends
+ auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+ if (numberOfCacheFiles > 0)
+ {
+ if (modelCacheHandle[index]->numFds != 1)
+ {
+ ALOGW(
+ "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1.");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+ auto cachedFd = modelCacheHandle[index]->data[0];
+
+ int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
+ if (modelCacheFileAccessMode != O_RDWR)
+ {
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ struct stat statBuffer;
+ if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
+ {
+ long modelDataSize = statBuffer.st_size;
+ if (modelDataSize <= 0)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Wrong cached model size!");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::NONE;
+ }
+ std::vector<uint8_t> modelData(modelDataSize);
+ pread(cachedFd, modelData.data(), modelData.size(), 0);
+ hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+ // For GpuAcc numberOfCachedFiles is 1
+ if (backend == armnn::Compute::GpuAcc)
+ {
+ gpuAccCachedFd = cachedFd;
+ }
+ }
+ index += numberOfCacheFiles;
+ }
+ }
+ }
+
+ if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // Deserialize the network..
+ armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
+ try
+ {
+ network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+ }
+ catch (std::exception&)
+ {
+ ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Exception caught from Deserializer!");
+ cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+ return V1_3::ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // Optimize the network
+ armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+ armnn::OptimizerOptionsOpaque OptOptions;
+ OptOptions.SetReduceFp32ToFp16(options.GetFp16Enabled());
+ OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
+
+ armnn::BackendOptions gpuAcc("GpuAcc",
+ {
+ {"FastMathEnabled", options.IsFastMathEnabled()},
+ {"SaveCachedNetwork", saveCachedNetwork},
+ {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
+ {"MLGOTuningFilePath", options.GetClMLGOTunedParametersFile()},
+ {"CachedFileDescriptor", gpuAccCachedFd}
+ });
+
+ armnn::BackendOptions cpuAcc("CpuAcc",
+ {
+ {"FastMathEnabled", options.IsFastMathEnabled()},
+ {"NumberOfThreads", options.GetNumberOfThreads()}
+ });
+ OptOptions.AddModelOption(gpuAcc);
+ OptOptions.AddModelOption(cpuAcc);
+
+ std::vector<std::string> errMessages;
+ try
+ {
+ optNet = armnn::Optimize(*network.get(),
+ options.GetBackends(),
+ runtime->GetDeviceSpec(),
+ OptOptions,
+ errMessages);
+ }
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what() << ") caught from optimize.";
+ FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ // Check that the optimized network is valid.
+ if (!optNet)
+ {
+ std::stringstream message;
+ message << "Invalid optimized network";
+ for (const std::string& msg : errMessages)
{
- clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ message << "\n" << msg;
}
- catch (std::exception& error)
+ FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ // Export the optimized network graph to a dot file if an output dump directory
+ // has been specified in the drivers' arguments.
+ std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+ options.GetRequestInputsAndOutputsDumpDir());
+
+ // Load it into the runtime.
+ armnn::NetworkId netId = 0;
+ std::string msg;
+ armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+ MemorySource::Undefined,
+ MemorySource::Undefined,
+ options.IsGpuProfilingEnabled());
+
+ try
+ {
+ if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
{
- ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
- options.GetClTunedParametersFile().c_str(), error.what());
+ return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb);
}
}
+ catch (std::exception& e)
+ {
+ std::stringstream message;
+ message << "Exception (" << e.what() << ") caught from LoadNetwork.";
+ FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
+ new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(netId,
+ runtime.get(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ options.IsGpuProfilingEnabled(),
+ V1_3::Priority::MEDIUM,
+ options.isAsyncModelExecutionEnabled(),
+ options.getNoOfArmnnThreads(),
+ options.isImportEnabled(),
+ options.isExportEnabled(),
+ true));
NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+ ALOGV("ArmnnDriverImpl::prepareModelFromCache timing = %lld µs",
+ std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - modelFromCacheTimepoint).count());
+
return V1_3::ErrorStatus::NONE;
}
diff --git a/1.3/ArmnnDriverImpl.hpp b/1.3/ArmnnDriverImpl.hpp
index 3c094fe5..a482edac 100644
--- a/1.3/ArmnnDriverImpl.hpp
+++ b/1.3/ArmnnDriverImpl.hpp
@@ -7,6 +7,7 @@
#include <HalInterfaces.h>
+#include "../CacheDataHandler.hpp"
#include "../DriverOptions.hpp"
#include <armnn/ArmNN.hpp>
@@ -31,13 +32,27 @@ namespace hal_1_3
class ArmnnDriverImpl
{
public:
- static Return<V1_3::ErrorStatus> prepareArmnnModel_1_3(const armnn::IRuntimePtr& runtime,
- const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
- const DriverOptions& options,
- const V1_3::Model& model,
- const android::sp<V1_3::IPreparedModelCallback>& cb,
- bool float32ToFloat16 = false,
- V1_3::Priority priority = V1_3::Priority::MEDIUM);
+ using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+ static Return<V1_3::ErrorStatus> prepareArmnnModel_1_3(
+ const armnn::IRuntimePtr& runtime,
+ const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+ const DriverOptions& options,
+ const V1_3::Model& model,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_3::IPreparedModelCallback>& cb,
+ bool float32ToFloat16 = false,
+ V1_3::Priority priority = V1_3::Priority::MEDIUM);
+
+ static Return<V1_3::ErrorStatus> prepareModelFromCache_1_3(
+ const armnn::IRuntimePtr& runtime,
+ const DriverOptions& options,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+ const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+ const HidlToken& token,
+ const android::sp<V1_3::IPreparedModelCallback>& cb);
static Return<void> getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
V1_3::IDevice::getCapabilities_1_3_cb cb);
diff --git a/1.3/HalPolicy.cpp b/1.3/HalPolicy.cpp
index d58ac6c6..e5f295fd 100644
--- a/1.3/HalPolicy.cpp
+++ b/1.3/HalPolicy.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -24,7 +24,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
case V1_3::OperationType::ABS:
return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Abs);
case V1_3::OperationType::ADD:
- return ConvertAdd(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Add);
case V1_3::OperationType::ARGMAX:
return ConvertArgMinMax(operation, model, data, ArgMinMaxFunction::Max);
case V1_3::OperationType::ARGMIN:
@@ -33,6 +33,10 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
return ConvertAveragePool2d(operation, model, data);
case V1_3::OperationType::BATCH_TO_SPACE_ND:
return ConvertBatchToSpaceNd(operation, model, data);
+ case V1_3::OperationType::CAST:
+ return ConvertCast(operation, model, data);
+ case V1_3::OperationType::CHANNEL_SHUFFLE:
+ return ConvertChannelShuffle(operation, model, data);
case V1_3::OperationType::CONCATENATION:
return ConvertConcatenation(operation, model, data);
case V1_3::OperationType::CONV_2D:
@@ -44,7 +48,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
case V1_3::OperationType::DEQUANTIZE:
return ConvertDequantize(operation, model, data);
case V1_3::OperationType::DIV:
- return ConvertDiv(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Div);
case V1_3::OperationType::ELU:
return ConvertElu(operation, model, data);
case V1_3::OperationType::EQUAL:
@@ -81,6 +85,8 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
return ConvertComparison(operation, model, data, ComparisonOperation::LessOrEqual);
case V1_3::OperationType::LOCAL_RESPONSE_NORMALIZATION:
return ConvertLocalResponseNormalization(operation, model, data);
+ case V1_3::OperationType::LOG:
+ return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Log);
case V1_3::OperationType::LOGICAL_AND:
return ConvertLogicalBinary(operation, model, data, LogicalBinaryOperation::LogicalAnd);
case V1_3::OperationType::LOGICAL_NOT:
@@ -96,13 +102,13 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
case V1_3::OperationType::MAX_POOL_2D:
return ConvertMaxPool2d(operation, model, data);
case V1_3::OperationType::MAXIMUM:
- return ConvertMaximum(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Maximum);
case V1_3::OperationType::MEAN:
return ConvertMean(operation, model, data);
case V1_3::OperationType::MINIMUM:
- return ConvertMinimum(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Minimum);
case V1_3::OperationType::MUL:
- return ConvertMul(operation, model, data);
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Mul);
case V1_3::OperationType::NEG:
return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Neg);
case V1_3::OperationType::NOT_EQUAL:
@@ -111,6 +117,8 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
return ConvertPad(operation, model, data);
case V1_3::OperationType::PAD_V2:
return ConvertPadV2(operation, model, data);
+ case V1_3::OperationType::POW:
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Power);
case V1_3::OperationType::PRELU:
return ConvertPrelu(operation, model, data);
case V1_3::OperationType::QUANTIZE:
@@ -125,6 +133,8 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
return ConvertReduce(operation, model, data, ReduceOperation::Max);
case V1_3::OperationType::REDUCE_MIN:
return ConvertReduce(operation, model, data, ReduceOperation::Min);
+ case V1_3::OperationType::REDUCE_PROD:
+ return ConvertReduce(operation, model, data, ReduceOperation::Prod);
case V1_3::OperationType::REDUCE_SUM:
return ConvertReduce(operation, model, data, ReduceOperation::Sum);
case V1_3::OperationType::RELU:
@@ -141,38 +151,40 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
return ConvertResize(operation, model, data, ResizeMethod::NearestNeighbor);
case V1_3::OperationType::RSQRT:
return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Rsqrt);
+ case V1_3::OperationType::SIN:
+ return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Sin);
+ case V1_3::OperationType::SOFTMAX:
+ return ConvertSoftmax(operation, model, data);
+ case V1_3::OperationType::SPACE_TO_BATCH_ND :
+ return ConvertSpaceToBatchNd(operation, model, data);
+ case V1_3::OperationType::SPACE_TO_DEPTH:
+ return ConvertSpaceToDepth(operation, model, data);
+ case V1_3::OperationType::SPLIT:
+ return ConvertSplit(operation, model, data);
case V1_3::OperationType::SQRT:
return ConvertSqrt(operation, model, data);
case V1_3::OperationType::SQUEEZE:
return ConvertSqueeze(operation, model, data);
case V1_3::OperationType::STRIDED_SLICE:
return ConvertStridedSlice(operation, model, data);
+ case V1_3::OperationType::SUB:
+ return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Sub);
case V1_3::OperationType::TRANSPOSE:
return ConvertTranspose(operation, model, data);
case V1_3::OperationType::TRANSPOSE_CONV_2D:
return ConvertTransposeConv2d(operation, model, data);
- case V1_3::OperationType::SOFTMAX:
- return ConvertSoftmax(operation, model, data);
- case V1_3::OperationType::SPACE_TO_BATCH_ND :
- return ConvertSpaceToBatchNd(operation, model, data);
- case V1_3::OperationType::SPACE_TO_DEPTH:
- return ConvertSpaceToDepth(operation, model, data);
- case V1_3::OperationType::SUB:
- return ConvertSub(operation, model, data);
case V1_3::OperationType::TANH:
return ConvertTanH(operation, model, data);
+ case V1_3::OperationType::TILE:
+ return ConvertTile(operation, model, data);
+ case V1_3::OperationType::UNIDIRECTIONAL_SEQUENCE_LSTM:
+ return ConvertUnidirectionalSequenceLstm(operation, model, data);
default:
return Fail("%s: Operation type %s not supported in ArmnnDriver",
__func__, toString(operation.type).c_str());
}
}
-bool HalPolicy::ConvertAdd(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_3::HalPolicy::ConvertAdd()");
- return ::ConvertAdd<hal_1_3::HalPolicy>(operation, model, data);
-}
-
bool HalPolicy::ConvertArgMinMax(const V1_3::Operation& operation,
const V1_3::Model& model,
ConversionData& data,
@@ -194,6 +206,18 @@ bool HalPolicy::ConvertBatchToSpaceNd(const Operation& operation, const Model& m
return ::ConvertBatchToSpaceNd<hal_1_3::HalPolicy>(operation, model, data);
}
+bool HalPolicy::ConvertCast(const Operation& operation, const Model& model, ConversionData& data)
+{
+ ALOGV("hal_1_3::HalPolicy::ConvertCast()");
+ return ::ConvertCast<hal_1_3::HalPolicy>(operation, model, data);
+}
+
+bool HalPolicy::ConvertChannelShuffle(const Operation& operation, const Model& model, ConversionData& data)
+{
+ ALOGV("hal_1_3::HalPolicy::ConvertChannelShuffle()");
+ return ::ConvertChannelShuffle<hal_1_3::HalPolicy>(operation, model, data);
+}
+
bool HalPolicy::ConvertComparison(const Operation& operation,
const Model& model,
ConversionData& data,
@@ -203,7 +227,6 @@ bool HalPolicy::ConvertComparison(const Operation& operation,
return ::ConvertComparison_1_2<hal_1_3::HalPolicy>(operation, model, data, comparisonOperation);
}
-
bool HalPolicy::ConvertConcatenation(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_3::HalPolicy::ConvertConcatenation()");
@@ -234,10 +257,13 @@ bool HalPolicy::ConvertDequantize(const Operation& operation, const Model& model
return ::ConvertDequantize_1_2<hal_1_3::HalPolicy>(operation, model, data);
}
-bool HalPolicy::ConvertDiv(const Operation& operation, const Model& model, ConversionData& data)
+bool HalPolicy::ConvertElementwiseBinary(const Operation& operation,
+ const Model& model,
+ ConversionData& data,
+ BinaryOperation binaryOperation)
{
- ALOGV("hal_1_3::HalPolicy::ConvertDiv()");
- return ::ConvertDiv<hal_1_3::HalPolicy>(operation, model, data);
+ ALOGV("hal_1_3::HalPolicy::ConvertElementwiseBinary()");
+ return ::ConvertElementwiseBinary<hal_1_3::HalPolicy>(operation, model, data, binaryOperation);
}
bool HalPolicy::ConvertElementwiseUnary(const Operation& operation,
@@ -359,30 +385,12 @@ bool HalPolicy::ConvertMaxPool2d(const Operation& operation, const Model& model,
return ConvertPooling2d<hal_1_3::HalPolicy>(operation, __func__, PoolingAlgorithm::Max, model, data);
}
-bool HalPolicy::ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_3::HalPolicy::ConvertMaximum()");
- return ::ConvertMaximum<hal_1_3::HalPolicy>(operation, model, data);
-}
-
bool HalPolicy::ConvertMean(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_3::HalPolicy::ConvertMean()");
return ::ConvertMean<hal_1_3::HalPolicy>(operation, model, data);
}
-bool HalPolicy::ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_3::HalPolicy::ConvertMinimum()");
- return ::ConvertMinimum<hal_1_3::HalPolicy>(operation, model, data);
-}
-
-bool HalPolicy::ConvertMul(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_3::HalPolicy::ConvertMul()");
- return ::ConvertMul<hal_1_3::HalPolicy>(operation, model, data);
-}
-
bool HalPolicy::ConvertPad(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_3::HalPolicy::ConvertPad()");
@@ -485,12 +493,6 @@ bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, C
return ::ConvertSoftmax<hal_1_3::HalPolicy>(operation, model, data);
}
-bool HalPolicy::ConvertSub(const Operation& operation, const Model& model, ConversionData& data)
-{
- ALOGV("hal_1_3::HalPolicy::ConvertSub()");
- return ::ConvertSub<hal_1_3::HalPolicy>(operation, model, data);
-}
-
bool HalPolicy::ConvertTanH(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_3::HalPolicy::ConvertTanH()");
@@ -503,6 +505,18 @@ bool HalPolicy::ConvertTransposeConv2d(const Operation& operation, const Model&
return ::ConvertTransposeConv2d<hal_1_3::HalPolicy>(operation, model, data);
}
+bool HalPolicy::ConvertSplit(const Operation& operation, const Model& model, ConversionData& data)
+{
+ ALOGV("hal_1_3::HalPolicy::ConvertSplit()");
+ return ::ConvertSplit<hal_1_3::HalPolicy>(operation, model, data);
+}
+
+bool HalPolicy::ConvertTile(const Operation& operation, const Model& model, ConversionData& data)
+{
+ ALOGV("hal_1_3::HalPolicy::ConvertTile()");
+ return ::ConvertTile<hal_1_3::HalPolicy>(operation, model, data);
+}
+
bool HalPolicy::ConvertSqrt(const Operation& operation, const Model& model, ConversionData& data)
{
ALOGV("hal_1_3::HalPolicy::ConvertSqrt()");
@@ -530,5 +544,11 @@ bool HalPolicy::ConvertTranspose(const Operation& operation, const Model& model,
return ::ConvertTranspose<hal_1_3::HalPolicy>(operation, model, data);
}
+bool HalPolicy::ConvertUnidirectionalSequenceLstm(const Operation& operation, const Model& model, ConversionData& data)
+{
+ ALOGV("hal_1_3::HalPolicy::ConvertUnidirectionalSequenceLstm()");
+ return ::ConvertUnidirectionalSequenceLstm<hal_1_3::HalPolicy>(operation, model, data);
+}
+
} // namespace hal_1_3
} // namespace armnn_driver
diff --git a/1.3/HalPolicy.hpp b/1.3/HalPolicy.hpp
index 6dfb8856..c876da19 100644
--- a/1.3/HalPolicy.hpp
+++ b/1.3/HalPolicy.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -36,8 +36,6 @@ public:
static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data);
private:
- static bool ConvertAdd(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertArgMinMax(const Operation& operation,
const Model& model,
ConversionData& data,
@@ -47,6 +45,10 @@ private:
static bool ConvertBatchToSpaceNd(const Operation& operation, const Model& model, ConversionData& data);
+ static bool ConvertCast(const Operation& operation, const Model& model, ConversionData& data);
+
+ static bool ConvertChannelShuffle(const Operation& operation, const Model& model, ConversionData& data);
+
static bool ConvertComparison(const Operation& operation,
const Model& model,
ConversionData& data,
@@ -62,7 +64,10 @@ private:
static bool ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data);
- static bool ConvertDiv(const Operation& operation, const Model& model, ConversionData& data);
+ static bool ConvertElementwiseBinary(const Operation& operation,
+ const Model& model,
+ ConversionData& data,
+ armnn::BinaryOperation binaryOperation);
static bool ConvertElementwiseUnary(const Operation& operation,
const Model& model,
@@ -108,14 +113,8 @@ private:
static bool ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data);
- static bool ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertMean(const Operation& operation, const Model& model, ConversionData& data);
- static bool ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data);
-
- static bool ConvertMul(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertPad(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertPadV2(const Operation& operation, const Model& model, ConversionData& data);
@@ -154,19 +153,25 @@ private:
static bool ConvertSpaceToDepth(const Operation& operation, const Model& model, ConversionData& data);
+ static bool ConvertSplit(const Operation& operation, const Model& model, ConversionData& data);
+
static bool ConvertSqrt(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertSqueeze(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertStridedSlice(const Operation& operation, const Model& model, ConversionData& data);
- static bool ConvertSub(const Operation& operation, const Model& model, ConversionData& data);
-
static bool ConvertTanH(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertTranspose(const Operation& operation, const Model& model, ConversionData& data);
static bool ConvertTransposeConv2d(const Operation& operation, const Model& model, ConversionData& data);
+
+ static bool ConvertTile(const Operation& operation, const Model& model, ConversionData& data);
+
+ static bool ConvertUnidirectionalSequenceLstm(const Operation& operation,
+ const Model& model,
+ ConversionData& data);
};
} // namespace hal_1_3
diff --git a/Android.bp b/Android.bp
index fde01e83..f7cdadcf 100644
--- a/Android.bp
+++ b/Android.bp
@@ -5,64 +5,16 @@
////////////////////////////////////////////
// //
-// static boost libs //
-// //
-////////////////////////////////////////////
-cc_defaults {
- name: "libboost-defaults",
- proprietary: true,
- export_include_dirs: ["boost_1_64_0"],
- cflags: [
- "-O3",
- "-fexceptions",
- "-Wno-unused-parameter",
- "-DBOOST_NO_AUTO_PTR"
- ],
- cppflags: [
- "-std=c++14"
- ],
- rtti: true,
-}
-
-cc_library_static {
- name: "libboost_unit_test_framework",
- defaults: ["libboost-defaults"],
- srcs: [
- "boost_1_64_0/libs/test/src/compiler_log_formatter.cpp",
- "boost_1_64_0/libs/test/src/framework.cpp",
- "boost_1_64_0/libs/test/src/results_reporter.cpp",
- "boost_1_64_0/libs/test/src/unit_test_main.cpp",
- "boost_1_64_0/libs/test/src/cpp_main.cpp",
- "boost_1_64_0/libs/test/src/junit_log_formatter.cpp",
- "boost_1_64_0/libs/test/src/test_main.cpp",
- "boost_1_64_0/libs/test/src/unit_test_monitor.cpp",
- "boost_1_64_0/libs/test/src/debug.cpp",
- "boost_1_64_0/libs/test/src/plain_report_formatter.cpp",
- "boost_1_64_0/libs/test/src/test_tools.cpp",
- "boost_1_64_0/libs/test/src/unit_test_parameters.cpp",
- "boost_1_64_0/libs/test/src/decorator.cpp",
- "boost_1_64_0/libs/test/src/progress_monitor.cpp",
- "boost_1_64_0/libs/test/src/test_tree.cpp",
- "boost_1_64_0/libs/test/src/xml_log_formatter.cpp",
- "boost_1_64_0/libs/test/src/execution_monitor.cpp",
- "boost_1_64_0/libs/test/src/results_collector.cpp",
- "boost_1_64_0/libs/test/src/unit_test_log.cpp",
- "boost_1_64_0/libs/test/src/xml_report_formatter.cpp",
- ],
-}
-
-////////////////////////////////////////////
-// //
// flatbuffers libs //
// //
////////////////////////////////////////////
cc_defaults {
name: "libflatbuffers-defaults",
proprietary: true,
- export_include_dirs: [ "flatbuffers-1.12.0/",
- "flatbuffers-1.12.0/include",
+ export_include_dirs: [ "flatbuffers/",
+ "flatbuffers/include",
],
- local_include_dirs: ["flatbuffers-1.12.0/include",],
+ local_include_dirs: ["flatbuffers/include",],
cflags: [
"-O3",
"-fexceptions",
@@ -79,8 +31,8 @@ cc_library_static {
name: "libflatbuffers-framework",
defaults: ["libflatbuffers-defaults"],
srcs: [
- "flatbuffers-1.12.0/src/flatc.cpp",
- "flatbuffers-1.12.0/src/flatc_main.cpp",
+ "flatbuffers/src/flatc.cpp",
+ "flatbuffers/src/flatc_main.cpp",
],
}
diff --git a/Android.mk b/Android.mk
index 06b95a1a..d6dbf0ac 100644
--- a/Android.mk
+++ b/Android.mk
@@ -1,5 +1,5 @@
#
-# Copyright © 2017 ARM Ltd. All rights reserved.
+# Copyright © 2022-2023 ARM Ltd. and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
@@ -43,23 +43,15 @@ R_OR_LATER := 1
ANDROID_R := 1
endif # PLATFORM_VERSION == 11
-ifeq ($(PLATFORM_VERSION),S)
+ifeq ($(PLATFORM_VERSION),$(filter $(PLATFORM_VERSION),12 13 14 S T U))
P_OR_LATER := 1
Q_OR_LATER := 1
R_OR_LATER := 1
S_OR_LATER := 1
ANDROID_R := 0
ANDROID_S := 1
-endif # PLATFORM_VERSION == S
+endif # PLATFORM_VERSION >= S
-ifeq ($(PLATFORM_VERSION),12)
-P_OR_LATER := 1
-Q_OR_LATER := 1
-R_OR_LATER := 1
-S_OR_LATER := 1
-ANDROID_R := 0
-ANDROID_S := 1
-endif # PLATFORM_VERSION == 12
CPP_VERSION := c++14
@@ -68,10 +60,16 @@ CPP_VERSION := c++17
endif
# Configure these paths if you move the source or Khronos headers
+ARMNN_GENERATED_HEADER_PATH := $(LOCAL_PATH)/armnn/generated
+ARMNN_PROFILING_HEADER_PATH := $(LOCAL_PATH)/armnn/profiling
+ARMNN_COMMON_PROFILING_HEADER_PATH := $(LOCAL_PATH)/armnn/profiling/common/include
+ARMNN_CLIENT_PROFILING_HEADER_PATH := $(LOCAL_PATH)/armnn/profiling/client/include
ARMNN_HEADER_PATH := $(LOCAL_PATH)/armnn/include
+ARMNN_BACKEND_MAKEFILE_LOCAL_PATHS := $(wildcard $(LOCAL_PATH)/armnn/src/backends/*/backend.mk)
+ARMNN_BACKEND_MAKEFILE_PATHS := $(subst $(LOCAL_PATH),,$(ARMNN_BACKEND_MAKEFILE_LOCAL_PATHS))
+ARMNN_BACKEND_MAKEFILE_DIRS := $(subst /backend.mk,,$(ARMNN_BACKEND_MAKEFILE_PATHS))
ARMNN_THIRD_PARTY_PATH := $(LOCAL_PATH)/armnn/third-party
ARMNN_UTILS_HEADER_PATH := $(LOCAL_PATH)/armnn/src/armnnUtils
-ARMNN_THIRD_PARTY_PATH := $(LOCAL_PATH)/armnn/third-party
OPENCL_HEADER_PATH := $(LOCAL_PATH)/clframework/include
NN_HEADER_PATH := $(LOCAL_PATH)/../../../frameworks/ml/nn/runtime/include
ifeq ($(S_OR_LATER),1)
@@ -83,7 +81,7 @@ endif
ARMNN_COMPUTE_CL_ENABLED := 1
ARMNN_COMPUTE_NEON_ENABLED := 1
ARMNN_REF_ENABLED := 1
-ARMNN_ETHOSN_ENABLED := 1
+ARMNN_ETHOSN_ENABLED := 0
ifeq ($(ARMNN_COMPUTE_CL_ENABLE),0)
ARMNN_COMPUTE_CL_ENABLED := 0
@@ -97,8 +95,8 @@ ifeq ($(ARMNN_REF_ENABLE),0)
ARMNN_REF_ENABLED := 0
endif
-ifeq ($(ARMNN_ETHOSN_ENABLE),0)
-ARMNN_ETHOSN_ENABLED := 0
+ifeq ($(ARMNN_ETHOSN_ENABLE),1)
+ARMNN_ETHOSN_ENABLED := 1
endif
# Variable to control inclusion of libOpenCL shared library
@@ -110,6 +108,24 @@ endif
# Variable to control retire rate of priority queue
RETIRE_RATE := 3
+# Placeholder to hold all backend link files.
+ARMNN_BACKEND_STATIC_LIBRARIES :=
+ARMNN_BACKEND_SHARED_LIBRARIES :=
+
+# Iterate through the Arm NN backends and specific include paths, include them into the
+# current makefile and append the linkfiles held by
+# the optional BACKEND_STATIC_LIBRARIES and optional BACKEND_SHARED_LIBRARIES variable
+# (included from the given makefile) to
+# the ARMNN_BACKEND_STATIC_LIBRARIES and ARMNN_BACKEND_SHARED_LIBRARIES lists
+
+$(foreach mkPath,$(ARMNN_BACKEND_MAKEFILE_DIRS),\
+ $(eval include $(LOCAL_PATH)/$(mkPath)/backend.mk)\
+ $(eval ARMNN_BACKEND_STATIC_LIBRARIES += $(BACKEND_STATIC_LIBRARIES)))
+
+$(foreach mkPath,$(ARMNN_BACKEND_MAKEFILE_DIRS),\
+ $(eval include $(LOCAL_PATH)/$(mkPath)/backend.mk)\
+ $(eval ARMNN_BACKEND_SHARED_LIBRARIES += $(BACKEND_SHARED_LIBRARIES)))
+
#######################
# libarmnn-driver@1.0 #
#######################
@@ -128,10 +144,13 @@ LOCAL_PROPRIETARY_MODULE := true
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_C_INCLUDES := \
- $(ARMNN_HEADER_PATH) \
+ $(ARMNN_GENERATED_HEADER_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
+ $(ARMNN_PROFILING_HEADER_PATH) \
+ $(ARMNN_COMMON_PROFILING_HEADER_PATH) \
+ $(ARMNN_CLIENT_PROFILING_HEADER_PATH) \
+ $(ARMNN_HEADER_PATH) \
$(ARMNN_UTILS_HEADER_PATH) \
- $(ARMNN_THIRD_PARTY_PATH) \
$(OPENCL_HEADER_PATH) \
$(NN_HEADER_PATH)
@@ -155,6 +174,8 @@ endif
ifeq ($(ARMNN_DRIVER_DEBUG),1)
LOCAL_CFLAGS+= \
-UNDEBUG
+LOCAL_LDFLAGS += \
+ -Wl,-Map=libarmnndriver10MapFile.map
endif # ARMNN_DRIVER_DEBUG == 1
ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1)
@@ -192,7 +213,8 @@ LOCAL_SRC_FILES := \
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := libarmnn
@@ -258,10 +280,13 @@ LOCAL_PROPRIETARY_MODULE := true
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_C_INCLUDES := \
- $(ARMNN_HEADER_PATH) \
+ $(ARMNN_GENERATED_HEADER_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
+ $(ARMNN_PROFILING_HEADER_PATH) \
+ $(ARMNN_COMMON_PROFILING_HEADER_PATH) \
+ $(ARMNN_CLIENT_PROFILING_HEADER_PATH) \
+ $(ARMNN_HEADER_PATH) \
$(ARMNN_UTILS_HEADER_PATH) \
- $(ARMNN_THIRD_PARTY_PATH) \
$(OPENCL_HEADER_PATH) \
$(NN_HEADER_PATH)
@@ -269,12 +294,17 @@ LOCAL_CFLAGS := \
-std=$(CPP_VERSION) \
-fexceptions \
-Werror \
+ -Wall \
+ -Wextra \
+ -Wno-unused-function \
-Wno-format-security \
-DARMNN_ANDROID_NN_V1_1
ifeq ($(ARMNN_DRIVER_DEBUG),1)
LOCAL_CFLAGS+= \
-UNDEBUG
+LOCAL_LDFLAGS += \
+ -Wl,-Map=libarmnnDriver11MapFile.map
endif # ARMNN_DRIVER_DEBUG == 1
# Required to build with the changes made to the Android ML framework specific to Android R
@@ -325,7 +355,8 @@ LOCAL_SRC_FILES := \
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := libarmnn
@@ -381,10 +412,13 @@ LOCAL_PROPRIETARY_MODULE := true
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_C_INCLUDES := \
- $(ARMNN_HEADER_PATH) \
+ $(ARMNN_GENERATED_HEADER_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
+ $(ARMNN_PROFILING_HEADER_PATH) \
+ $(ARMNN_COMMON_PROFILING_HEADER_PATH) \
+ $(ARMNN_CLIENT_PROFILING_HEADER_PATH) \
+ $(ARMNN_HEADER_PATH) \
$(ARMNN_UTILS_HEADER_PATH) \
- $(ARMNN_THIRD_PARTY_PATH) \
$(OPENCL_HEADER_PATH) \
$(NN_HEADER_PATH)
@@ -392,12 +426,17 @@ LOCAL_CFLAGS := \
-std=$(CPP_VERSION) \
-fexceptions \
-Werror \
+ -Wall \
+ -Wextra \
+ -Wno-unused-function \
-Wno-format-security \
-DARMNN_ANDROID_NN_V1_2
ifeq ($(ARMNN_DRIVER_DEBUG),1)
LOCAL_CFLAGS+= \
-UNDEBUG
+LOCAL_LDFLAGS += \
+ -Wl,-Map=libarmnnDriver12MapFile.map
endif # ARMNN_DRIVER_DEBUG == 1
# Required to build with the changes made to the Android ML framework specific to Android R
@@ -442,6 +481,7 @@ LOCAL_SRC_FILES := \
ArmnnDriverImpl.cpp \
ArmnnPreparedModel.cpp \
ArmnnPreparedModel_1_2.cpp \
+ CacheDataHandler.cpp \
ConversionUtils.cpp \
DriverOptions.cpp \
ModelToINetworkConverter.cpp \
@@ -451,7 +491,8 @@ LOCAL_SRC_FILES := \
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := libarmnn
@@ -503,10 +544,13 @@ LOCAL_PROPRIETARY_MODULE := true
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_C_INCLUDES := \
- $(ARMNN_HEADER_PATH) \
+ $(ARMNN_GENERATED_HEADER_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
+ $(ARMNN_PROFILING_HEADER_PATH) \
+ $(ARMNN_COMMON_PROFILING_HEADER_PATH) \
+ $(ARMNN_CLIENT_PROFILING_HEADER_PATH) \
+ $(ARMNN_HEADER_PATH) \
$(ARMNN_UTILS_HEADER_PATH) \
- $(ARMNN_THIRD_PARTY_PATH) \
$(OPENCL_HEADER_PATH) \
$(NN_HEADER_PATH)
@@ -514,6 +558,9 @@ LOCAL_CFLAGS := \
-std=$(CPP_VERSION) \
-fexceptions \
-Werror \
+ -Wall \
+ -Wextra \
+ -Wno-unused-function \
-Wno-format-security \
-DARMNN_ANDROID_NN_V1_3 \
@@ -530,6 +577,8 @@ endif
ifeq ($(ARMNN_DRIVER_DEBUG),1)
LOCAL_CFLAGS+= \
-UNDEBUG
+LOCAL_LDFLAGS += \
+ -Wl,-Map=libarmnnDriver13MapFile.map
endif # ARMNN_DRIVER_DEBUG == 1
ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1)
@@ -569,6 +618,7 @@ LOCAL_SRC_FILES := \
ArmnnPreparedModel.cpp \
ArmnnPreparedModel_1_2.cpp \
ArmnnPreparedModel_1_3.cpp \
+ CacheDataHandler.cpp \
ConversionUtils.cpp \
DriverOptions.cpp \
ModelToINetworkConverter.cpp \
@@ -579,7 +629,8 @@ LOCAL_SRC_FILES := \
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := libarmnn
@@ -631,8 +682,12 @@ LOCAL_PROPRIETARY_MODULE := true
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_C_INCLUDES := \
- $(ARMNN_HEADER_PATH) \
+ $(ARMNN_GENERATED_HEADER_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
+ $(ARMNN_PROFILING_HEADER_PATH) \
+ $(ARMNN_COMMON_PROFILING_HEADER_PATH) \
+ $(ARMNN_CLIENT_PROFILING_HEADER_PATH) \
+ $(ARMNN_HEADER_PATH) \
$(NN_HEADER_PATH)
LOCAL_CFLAGS := \
@@ -642,6 +697,8 @@ LOCAL_CFLAGS := \
ifeq ($(ARMNN_DRIVER_DEBUG),1)
LOCAL_CFLAGS += \
-UNDEBUG
+LOCAL_LDFLAGS += \
+ -Wl,-Map=neuralNetworks10MapFile.map
endif # ARMNN_DRIVER_DEBUG == 1
# Required to build with the changes made to the Android ML framework specific to Android R
@@ -661,7 +718,8 @@ LOCAL_SRC_FILES := \
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := \
libarmnn-driver@1.0
@@ -678,7 +736,8 @@ LOCAL_SHARED_LIBRARIES := \
libutils \
android.hardware.neuralnetworks@1.0 \
android.hidl.allocator@1.0 \
- android.hidl.memory@1.0
+ android.hidl.memory@1.0 \
+ $(ARMNN_BACKEND_SHARED_LIBRARIES)
ifeq ($(P_OR_LATER),1)
# Required to build the 1.0 version of the NN Driver on Android P and later versions,
@@ -732,8 +791,12 @@ LOCAL_PROPRIETARY_MODULE := true
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_C_INCLUDES := \
- $(ARMNN_HEADER_PATH) \
+ $(ARMNN_GENERATED_HEADER_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
+ $(ARMNN_PROFILING_HEADER_PATH) \
+ $(ARMNN_COMMON_PROFILING_HEADER_PATH) \
+ $(ARMNN_CLIENT_PROFILING_HEADER_PATH) \
+ $(ARMNN_HEADER_PATH) \
$(NN_HEADER_PATH)
LOCAL_CFLAGS := \
@@ -744,6 +807,8 @@ LOCAL_CFLAGS := \
ifeq ($(ARMNN_DRIVER_DEBUG),1)
LOCAL_CFLAGS += \
-UNDEBUG
+LOCAL_LDFLAGS += \
+ -Wl,-Map=neuralNetworks11MapFile.map
endif # ARMNN_DRIVER_DEBUG == 1
# Required to build with the changes made to the Android ML framework specific to Android R
@@ -763,7 +828,8 @@ LOCAL_SRC_FILES := \
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := \
libarmnn-driver@1.1
@@ -781,7 +847,8 @@ LOCAL_SHARED_LIBRARIES := \
android.hardware.neuralnetworks@1.0 \
android.hardware.neuralnetworks@1.1 \
android.hidl.allocator@1.0 \
- android.hidl.memory@1.0
+ android.hidl.memory@1.0 \
+ $(ARMNN_BACKEND_SHARED_LIBRARIES)
ifeq ($(Q_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
@@ -825,8 +892,12 @@ LOCAL_PROPRIETARY_MODULE := true
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_C_INCLUDES := \
- $(ARMNN_HEADER_PATH) \
+ $(ARMNN_GENERATED_HEADER_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
+ $(ARMNN_PROFILING_HEADER_PATH) \
+ $(ARMNN_COMMON_PROFILING_HEADER_PATH) \
+ $(ARMNN_CLIENT_PROFILING_HEADER_PATH) \
+ $(ARMNN_HEADER_PATH) \
$(NN_HEADER_PATH)
LOCAL_CFLAGS := \
@@ -837,6 +908,8 @@ LOCAL_CFLAGS := \
ifeq ($(ARMNN_DRIVER_DEBUG),1)
LOCAL_CFLAGS += \
-UNDEBUG
+LOCAL_LDFLAGS += \
+ -Wl,-Map=neuralNetworks12MapFile.map
endif # ARMNN_DRIVER_DEBUG == 1
# Required to build with the changes made to the Android ML framework specific to Android R
@@ -856,7 +929,8 @@ LOCAL_SRC_FILES := \
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := \
libarmnn-driver@1.2
@@ -879,7 +953,8 @@ LOCAL_SHARED_LIBRARIES := \
android.hidl.memory@1.0 \
android.hardware.neuralnetworks@1.0 \
android.hardware.neuralnetworks@1.1 \
- android.hardware.neuralnetworks@1.2
+ android.hardware.neuralnetworks@1.2 \
+ $(ARMNN_BACKEND_SHARED_LIBRARIES)
ifeq ($(R_OR_LATER),1)
LOCAL_SHARED_LIBRARIES+= \
@@ -914,8 +989,12 @@ LOCAL_PROPRIETARY_MODULE := true
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_C_INCLUDES := \
- $(ARMNN_HEADER_PATH) \
+ $(ARMNN_GENERATED_HEADER_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
+ $(ARMNN_PROFILING_HEADER_PATH) \
+ $(ARMNN_COMMON_PROFILING_HEADER_PATH) \
+ $(ARMNN_CLIENT_PROFILING_HEADER_PATH) \
+ $(ARMNN_HEADER_PATH) \
$(NN_HEADER_PATH)
LOCAL_CFLAGS := \
@@ -936,6 +1015,8 @@ endif
ifeq ($(ARMNN_DRIVER_DEBUG),1)
LOCAL_CFLAGS += \
-UNDEBUG
+LOCAL_LDFLAGS += \
+ -Wl,-Map=neuralNetworks13MapFile.map
endif # ARMNN_DRIVER_DEBUG == 1
LOCAL_SRC_FILES := \
@@ -944,7 +1025,8 @@ LOCAL_SRC_FILES := \
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := \
libarmnn-driver@1.3
@@ -969,7 +1051,8 @@ LOCAL_SHARED_LIBRARIES := \
android.hardware.neuralnetworks@1.0 \
android.hardware.neuralnetworks@1.1 \
android.hardware.neuralnetworks@1.2 \
- android.hardware.neuralnetworks@1.3
+ android.hardware.neuralnetworks@1.3 \
+ $(ARMNN_BACKEND_SHARED_LIBRARIES)
ifeq ($(ARMNN_INCLUDE_LIBOPENCL),1)
LOCAL_SHARED_LIBRARIES+= \
diff --git a/ArmnnDevice.cpp b/ArmnnDevice.cpp
index ffb07c8d..f2a198d5 100644
--- a/ArmnnDevice.cpp
+++ b/ArmnnDevice.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -98,7 +98,17 @@ ArmnnDevice::ArmnnDevice(DriverOptions options)
}
else
{
- backends.push_back(backend);
+ if (m_Options.isAsyncModelExecutionEnabled() &&
+ armnn::HasMatchingCapability(armnn::BackendOptions::BackendOption{"AsyncExecution", false},
+ backend))
+ {
+ ALOGV("ArmnnDevice: ArmNN does not support AsyncExecution with the following backend: %s",
+ backend.Get().c_str());
+ }
+ else
+ {
+ backends.push_back(backend);
+ }
}
}
}
diff --git a/ArmnnDriverImpl.cpp b/ArmnnDriverImpl.cpp
index 3e4aab3c..dd60cc7b 100644
--- a/ArmnnDriverImpl.cpp
+++ b/ArmnnDriverImpl.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -23,6 +23,7 @@
#include <ValidateHal.h>
#include <log/log.h>
+#include <chrono>
using namespace std;
using namespace android;
@@ -70,6 +71,8 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
{
ALOGV("ArmnnDriverImpl::prepareModel()");
+ std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now();
+
if (cb.get() == nullptr)
{
ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
@@ -102,13 +105,17 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
// Serialize the network graph to a .armnn file if an output directory
// has been specified in the drivers' arguments.
+ std::vector<uint8_t> dataCacheData;
auto serializedNetworkFileName =
- SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
+ SerializeNetwork(*modelConverter.GetINetwork(),
+ options.GetRequestInputsAndOutputsDumpDir(),
+ dataCacheData,
+ false);
// Optimize the network
armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
- armnn::OptimizerOptions OptOptions;
- OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+ armnn::OptimizerOptionsOpaque OptOptions;
+ OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
armnn::BackendOptions gpuAcc("GpuAcc",
{
@@ -124,8 +131,8 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
{ "FastMathEnabled", options.IsFastMathEnabled() },
{ "NumberOfThreads", options.GetNumberOfThreads() }
});
- OptOptions.m_ModelOptions.push_back(gpuAcc);
- OptOptions.m_ModelOptions.push_back(cpuAcc);
+ OptOptions.AddModelOption(gpuAcc);
+ OptOptions.AddModelOption(cpuAcc);
std::vector<std::string> errMessages;
try
@@ -163,9 +170,14 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
// Load it into the runtime.
armnn::NetworkId netId = 0;
+ std::string msg;
+ armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+ armnn::MemorySource::Undefined,
+ armnn::MemorySource::Undefined);
+
try
{
- if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
+ if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
{
return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
}
@@ -191,32 +203,43 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
runtime.get(),
model,
options.GetRequestInputsAndOutputsDumpDir(),
- options.IsGpuProfilingEnabled()));
-
- // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
- // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
- if (!preparedModel->ExecuteWithDummyInputs())
+ options.IsGpuProfilingEnabled(),
+ options.isAsyncModelExecutionEnabled(),
+ options.getNoOfArmnnThreads(),
+ options.isImportEnabled(),
+ options.isExportEnabled()));
+
+ if (std::find(options.GetBackends().begin(),
+ options.GetBackends().end(),
+ armnn::Compute::GpuAcc) != options.GetBackends().end())
{
- return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
- }
-
- if (clTunedParameters &&
- options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
- {
- // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
- try
+ // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
+ // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
+ if (!preparedModel->ExecuteWithDummyInputs())
{
- clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
}
- catch (std::exception& error)
+
+ if (clTunedParameters &&
+ options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
{
- ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
- options.GetClTunedParametersFile().c_str(), error.what());
+ // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file
+ try
+ {
+ clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+ }
+ catch (std::exception& error)
+ {
+ ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+ options.GetClTunedParametersFile().c_str(), error.what());
+ }
}
}
-
NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel);
+ ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - prepareModelTimepoint).count());
+
return V1_0::ErrorStatus::NONE;
}
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp
index 60beac4f..d87f9f82 100644
--- a/ArmnnPreparedModel.cpp
+++ b/ArmnnPreparedModel.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -8,11 +8,13 @@
#include "ArmnnPreparedModel.hpp"
#include "Utils.hpp"
+#include <armnn/Types.hpp>
+
#include <log/log.h>
#include <OperationsUtils.h>
#include <ValidateHal.h>
-#include <cassert>
+#include <chrono>
#include <cinttypes>
#ifdef ARMNN_ANDROID_S
@@ -90,6 +92,9 @@ RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0>
ArmnnPreparedModel<HalVersion>::m_RequestThread;
template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
template <typename TensorBindingCollection>
void ArmnnPreparedModel<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
const TensorBindingCollection& tensorBindings)
@@ -112,16 +117,43 @@ ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
armnn::IRuntime* runtime,
const HalModel& model,
const std::string& requestInputsAndOutputsDumpDir,
- const bool gpuProfilingEnabled)
+ const bool gpuProfilingEnabled,
+ const bool asyncModelExecutionEnabled,
+ const unsigned int numberOfThreads,
+ const bool importEnabled,
+ const bool exportEnabled)
: m_NetworkId(networkId)
, m_Runtime(runtime)
, m_Model(model)
, m_RequestCount(0)
, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
, m_GpuProfilingEnabled(gpuProfilingEnabled)
+ , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+ , m_EnableImport(importEnabled)
+ , m_EnableExport(exportEnabled)
{
// Enable profiling if required.
m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+ if (m_AsyncModelExecutionEnabled)
+ {
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ for (unsigned int i=0; i < numberOfThreads; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ if (!m_Threadpool)
+ {
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+ }
+ else
+ {
+ m_Threadpool->LoadMemHandles(memHandles);
+ }
+
+ m_WorkingMemHandle = memHandles.back();
+ }
}
template<typename HalVersion>
@@ -129,12 +161,21 @@ ArmnnPreparedModel<HalVersion>::~ArmnnPreparedModel()
{
// Get a hold of the profiler used by this model.
std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+ if (profiler && m_GpuProfilingEnabled)
+ {
+ // Dump the profiling info to a file if required.
+ DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId,
+ profiler.get());
+ }
// Unload the network associated with this model.
m_Runtime->UnloadNetwork(m_NetworkId);
- // Dump the profiling info to a file if required.
- DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
+ // Unload the network memhandles from the threadpool
+ if (m_AsyncModelExecutionEnabled)
+ {
+ m_Threadpool->UnloadMemHandles(m_NetworkId);
+ }
}
template<typename HalVersion>
@@ -177,6 +218,7 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
return V1_0::ErrorStatus::GENERAL_FAILURE;
}
+
// add the inputs and outputs with their data
try
{
@@ -184,8 +226,19 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
for (unsigned int i = 0; i < request.inputs.size(); i++)
{
const auto& inputArg = request.inputs[i];
+ armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
+ // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+ inputTensorInfo.SetConstant();
+ auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
+ inputTensorInfo,
+ inputArg,
+ "input");
+ if (result != V1_0::ErrorStatus::NONE)
+ {
+ return result;
+ }
- const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
if (inputTensor.GetMemoryArea() == nullptr)
{
@@ -200,8 +253,17 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
for (unsigned int i = 0; i < request.outputs.size(); i++)
{
const auto& outputArg = request.outputs[i];
-
const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
+ auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
+ outputTensorInfo,
+ outputArg,
+ "output");
+
+ if (result != V1_0::ErrorStatus::NONE)
+ {
+ return result;
+ }
+
const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
if (outputTensor.GetMemoryArea() == nullptr)
{
@@ -225,8 +287,6 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
return V1_0::ErrorStatus::GENERAL_FAILURE;
}
- ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
-
auto cb = [callback](V1_0::ErrorStatus errorStatus, std::string callingFunction)
{
NotifyCallbackAndCheck(callback, errorStatus, callingFunction);
@@ -234,7 +294,17 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
CallbackContext_1_0 armnnCb;
armnnCb.callback = cb;
+
+ if (m_AsyncModelExecutionEnabled)
+ {
+ ALOGV("ArmnnPreparedModel::execute(...) before ScheduleGraphForExecution");
+ ScheduleGraphForExecution(pMemPools, pInputTensors, pOutputTensors, armnnCb);
+ ALOGV("ArmnnPreparedModel::execute(...) after ScheduleGraphForExecution");
+ return V1_0::ErrorStatus::NONE;
+ }
+
// post the request for asynchronous execution
+ ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb);
ALOGV("ArmnnPreparedModel::execute(...) after PostMsg");
return V1_0::ErrorStatus::NONE; // successfully queued
@@ -248,13 +318,37 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
CallbackContext_1_0 cb)
{
ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
+ // Capture the graph execution start time.
+ std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
DumpTensorsIfRequired("Input", inputTensors);
// run it
try
{
- armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+ armnn::Status status;
+ if (m_AsyncModelExecutionEnabled)
+ {
+ ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled true");
+ status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
+ }
+ else
+ {
+ ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false");
+ // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
+ std::vector<armnn::ImportedInputId> importedInputIds;
+ if (m_EnableImport)
+ {
+ importedInputIds = m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
+ }
+ std::vector<armnn::ImportedOutputId> importedOutputIds;
+ if (m_EnableExport)
+ {
+ importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
+ }
+ status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
+ importedInputIds, importedOutputIds);
+ }
if (status != armnn::Status::Success)
{
ALOGW("EnqueueWorkload failed");
@@ -291,6 +385,12 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
#endif
}
+ // Log the total time in this call. This is a good number to compare to that printed out by
+ // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+ ALOGI("ArmnnPreparedModel::ExecuteGraph Execution time = %lld µs",
+ std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - graphExecutionStart).count());
+
cb.callback(V1_0::ErrorStatus::NONE, "ExecuteGraph");
}
@@ -301,7 +401,11 @@ bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
armnn::InputTensors inputTensors;
for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
{
- const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
+ // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+ inputTensorInfo.SetConstant();
+
storage.emplace_back(inputTensorInfo.GetNumBytes());
const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
@@ -320,7 +424,29 @@ bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
try
{
- armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+ armnn::Status status;
+ if (m_AsyncModelExecutionEnabled)
+ {
+ ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled true");
+ status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
+ }
+ else
+ {
+ ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false");
+ // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
+ std::vector<armnn::ImportedInputId> importedInputIds;
+ if (m_EnableImport)
+ {
+ importedInputIds = m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
+ }
+ std::vector<armnn::ImportedOutputId> importedOutputIds;
+ if (m_EnableExport)
+ {
+ importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
+ }
+ status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
+ importedInputIds, importedOutputIds);
+ }
if (status != armnn::Status::Success)
{
ALOGW("ExecuteWithDummyInputs: EnqueueWorkload failed");
@@ -340,11 +466,73 @@ bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
return true;
}
+/// Schedule the graph prepared from the request for execution
+template<typename HalVersion>
+template<typename CallbackContext>
+void ArmnnPreparedModel<HalVersion>::ScheduleGraphForExecution(
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext callbackContext)
+{
+ ALOGV("ArmnnPreparedModel::ScheduleGraphForExecution(...)");
+
+ DumpTensorsIfRequired("Input", *inputTensors);
+
+
+ auto tpCb = std::make_shared<
+ ArmnnThreadPoolCallback<CallbackContext_1_0>>(this,
+ pMemPools,
+ inputTensors,
+ outputTensors,
+ callbackContext);
+
+ m_Threadpool->Schedule(m_NetworkId,
+ *tpCb->m_InputTensors,
+ *tpCb->m_OutputTensors,
+ armnn::QosExecPriority::Medium,
+ tpCb);
+ ALOGV("ArmnnPreparedModel::ScheduleGraphForExecution end");
+}
+
+template<typename HalVersion>
+template <typename CallbackContext>
+void ArmnnPreparedModel<HalVersion>::ArmnnThreadPoolCallback<CallbackContext>::Notify(
+ armnn::Status status, armnn::InferenceTimingPair timeTaken)
+{
+ armnn::IgnoreUnused(status, timeTaken);
+ ALOGV("ArmnnPreparedModel::ArmnnThreadPoolCallback_1_2 Notify");
+
+ m_Model->DumpTensorsIfRequired("Output", *m_OutputTensors);
+
+ // Commit output buffers.
+ // Note that we update *all* pools, even if they aren't actually used as outputs -
+ // this is simpler and is what the CpuExecutor does.
+ for (android::nn::RunTimePoolInfo& pool : *m_MemPools)
+ {
+ // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
+ // update() has been removed and flush() added.
+ #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) // Use the new Android implementation.
+ pool.flush();
+ #else
+ pool.update();
+ #endif
+ }
+
+ m_CallbackContext.callback(V1_0::ErrorStatus::NONE, "ArmnnPreparedModel::ArmnnThreadPoolCallback_1_2 Notify");
+ return;
+}
+
///
/// Class template specializations
///
template class ArmnnPreparedModel<hal_1_0::HalPolicy>;
+template void ArmnnPreparedModel<hal_1_0::HalPolicy>::ScheduleGraphForExecution<CallbackContext_1_0>(
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext_1_0 callbackContext);
#ifdef ARMNN_ANDROID_NN_V1_1
template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp
index 89f6226f..b9d89d4b 100644
--- a/ArmnnPreparedModel.hpp
+++ b/ArmnnPreparedModel.hpp
@@ -11,6 +11,7 @@
#include <NeuralNetworks.h>
#include <armnn/ArmNN.hpp>
+#include <armnn/Threadpool.hpp>
#include <string>
#include <vector>
@@ -38,7 +39,11 @@ public:
armnn::IRuntime* runtime,
const HalModel& model,
const std::string& requestInputsAndOutputsDumpDir,
- const bool gpuProfilingEnabled);
+ const bool gpuProfilingEnabled,
+ const bool asyncModelExecutionEnabled = false,
+ const unsigned int numberOfThreads = 1,
+ const bool importEnabled = false,
+ const bool exportEnabled = false);
virtual ~ArmnnPreparedModel();
@@ -56,18 +61,60 @@ public:
bool ExecuteWithDummyInputs();
private:
+
+ template<typename CallbackContext>
+ class ArmnnThreadPoolCallback : public armnn::IAsyncExecutionCallback
+ {
+ public:
+ ArmnnThreadPoolCallback(ArmnnPreparedModel<HalVersion>* model,
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext callbackContext) :
+ m_Model(model),
+ m_MemPools(pMemPools),
+ m_InputTensors(inputTensors),
+ m_OutputTensors(outputTensors),
+ m_CallbackContext(callbackContext)
+ {}
+
+ void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override;
+
+ ArmnnPreparedModel<HalVersion>* m_Model;
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
+ std::shared_ptr<armnn::InputTensors> m_InputTensors;
+ std::shared_ptr<armnn::OutputTensors> m_OutputTensors;
+ CallbackContext m_CallbackContext;
+ };
+
template <typename TensorBindingCollection>
void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings);
- armnn::NetworkId m_NetworkId;
- armnn::IRuntime* m_Runtime;
- HalModel m_Model;
+ /// schedule the graph prepared from the request for execution
+ template<typename CallbackContext>
+ void ScheduleGraphForExecution(
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext m_CallbackContext);
+
+ armnn::NetworkId m_NetworkId;
+ armnn::IRuntime* m_Runtime;
+ HalModel m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
- static RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0> m_RequestThread;
- uint32_t m_RequestCount;
- const std::string& m_RequestInputsAndOutputsDumpDir;
- const bool m_GpuProfilingEnabled;
+ static RequestThread<ArmnnPreparedModel,
+ HalVersion,
+ CallbackContext_1_0> m_RequestThread;
+ uint32_t m_RequestCount;
+ const std::string& m_RequestInputsAndOutputsDumpDir;
+ const bool m_GpuProfilingEnabled;
+ // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+ static std::unique_ptr<armnn::Threadpool> m_Threadpool;
+ std::shared_ptr<armnn::IWorkingMemHandle> m_WorkingMemHandle;
+ const bool m_AsyncModelExecutionEnabled;
+ const bool m_EnableImport;
+ const bool m_EnableExport;
};
}
diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp
index a2148c29..a401b30e 100644
--- a/ArmnnPreparedModel_1_2.cpp
+++ b/ArmnnPreparedModel_1_2.cpp
@@ -1,19 +1,22 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#define LOG_TAG "ArmnnDriver"
#include "ArmnnPreparedModel_1_2.hpp"
+
#include "Utils.hpp"
+#include <armnn/Types.hpp>
+
#include <log/log.h>
#include <OperationsUtils.h>
#include <ExecutionBurstServer.h>
#include <ValidateHal.h>
-#include <cassert>
+#include <chrono>
#include <cinttypes>
#ifdef ARMNN_ANDROID_S
@@ -124,6 +127,9 @@ RequestThread<ArmnnPreparedModel_1_2, HalVersion, CallbackContext_1_2>
ArmnnPreparedModel_1_2<HalVersion>::m_RequestThread;
template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel_1_2<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
template<typename TensorBindingCollection>
void ArmnnPreparedModel_1_2<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
const TensorBindingCollection& tensorBindings)
@@ -146,16 +152,88 @@ ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId netw
armnn::IRuntime* runtime,
const V1_2::Model& model,
const std::string& requestInputsAndOutputsDumpDir,
- const bool gpuProfilingEnabled)
+ const bool gpuProfilingEnabled,
+ const bool asyncModelExecutionEnabled,
+ const unsigned int numberOfThreads,
+ const bool importEnabled,
+ const bool exportEnabled)
: m_NetworkId(networkId)
, m_Runtime(runtime)
, m_Model(model)
, m_RequestCount(0)
, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
, m_GpuProfilingEnabled(gpuProfilingEnabled)
+ , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+ , m_EnableImport(importEnabled)
+ , m_EnableExport(exportEnabled)
+ , m_PreparedFromCache(false)
{
// Enable profiling if required.
m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+ if (m_AsyncModelExecutionEnabled)
+ {
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ for (unsigned int i=0; i < numberOfThreads; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ if (!m_Threadpool)
+ {
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+ }
+ else
+ {
+ m_Threadpool->LoadMemHandles(memHandles);
+ }
+
+ m_WorkingMemHandle = memHandles.back();
+ }
+}
+
+template<typename HalVersion>
+ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId networkId,
+ armnn::IRuntime* runtime,
+ const std::string& requestInputsAndOutputsDumpDir,
+ const bool gpuProfilingEnabled,
+ const bool asyncModelExecutionEnabled,
+ const unsigned int numberOfThreads,
+ const bool importEnabled,
+ const bool exportEnabled,
+ const bool preparedFromCache)
+ : m_NetworkId(networkId)
+ , m_Runtime(runtime)
+ , m_RequestCount(0)
+ , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+ , m_GpuProfilingEnabled(gpuProfilingEnabled)
+ , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+ , m_EnableImport(importEnabled)
+ , m_EnableExport(exportEnabled)
+ , m_PreparedFromCache(preparedFromCache)
+{
+ // Enable profiling if required.
+ m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+ if (m_AsyncModelExecutionEnabled)
+ {
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ for (unsigned int i=0; i < numberOfThreads; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ if (!m_Threadpool)
+ {
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+ }
+ else
+ {
+ m_Threadpool->LoadMemHandles(memHandles);
+ }
+
+ m_WorkingMemHandle = memHandles.back();
+ }
}
template<typename HalVersion>
@@ -163,12 +241,21 @@ ArmnnPreparedModel_1_2<HalVersion>::~ArmnnPreparedModel_1_2()
{
// Get a hold of the profiler used by this model.
std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+ if (profiler && m_GpuProfilingEnabled)
+ {
+ // Dump the profiling info to a file if required.
+ DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId,
+ profiler.get());
+ }
// Unload the network associated with this model.
m_Runtime->UnloadNetwork(m_NetworkId);
- // Dump the profiling info to a file if required.
- DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
+ // Unload the network memhandles from the threadpool
+ if (m_AsyncModelExecutionEnabled)
+ {
+ m_Threadpool->UnloadMemHandles(m_NetworkId);
+ }
}
template<typename HalVersion>
@@ -225,8 +312,20 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::PrepareMemoryForIn
for (unsigned int i = 0; i < request.inputs.size(); i++)
{
const auto& inputArg = request.inputs[i];
+ armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ // inputs (of type InputTensors) is composed of a vector of ConstTensors.
+ // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+ inputTensorInfo.SetConstant();
+ auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
+ inputTensorInfo,
+ inputArg,
+ "input");
+
+ if (result != V1_0::ErrorStatus::NONE)
+ {
+ return result;
+ }
- const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
if (inputTensor.GetMemoryArea() == nullptr)
@@ -252,8 +351,17 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::PrepareMemoryForOu
for (unsigned int i = 0; i < request.outputs.size(); i++)
{
const auto& outputArg = request.outputs[i];
+ armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
+ auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
+ outputTensorInfo,
+ outputArg,
+ "output");
+
+ if (result != V1_0::ErrorStatus::NONE)
+ {
+ return result;
+ }
- const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools);
if (outputTensor.GetMemoryArea() == nullptr)
{
@@ -277,7 +385,7 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::PrepareMemoryForOu
return V1_0::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
}
#else
- const size_t bufferSize = memPools.at(outputArg.location.poolIndex).getMemory().size;
+ const size_t bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
if (bufferSize < outputSize)
{
ALOGW("ArmnnPreparedModel_1_2::Execute failed bufferSize (%s) < outputSize (%s)",
@@ -352,7 +460,10 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const V1_0
V1_2::MeasureTiming measureTiming,
executeSynchronously_cb cb)
{
- ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
if (cb == nullptr)
@@ -368,7 +479,7 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const V1_0
driverStart = Now();
}
- if (!android::nn::validateRequest(request, m_Model))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
{
ALOGE("ArmnnPreparedModel_1_2::executeSynchronously invalid request model");
cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming);
@@ -419,6 +530,8 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
ALOGV("ArmnnPreparedModel_1_2::ExecuteGraph(...)");
TimePoint driverEnd, deviceStart, deviceEnd;
+ // Capture the graph execution start time.
+ std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
DumpTensorsIfRequired("Input", inputTensors);
@@ -440,7 +553,30 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
deviceStart = Now();
}
- armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+ armnn::Status status;
+ if (m_AsyncModelExecutionEnabled)
+ {
+ ALOGW("ArmnnPreparedModel_1_2::ExecuteGraph m_AsyncModelExecutionEnabled true");
+ status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
+ }
+ else
+ {
+ ALOGW("ArmnnPreparedModel_1_2::ExecuteGraph m_AsyncModelExecutionEnabled false");
+
+ // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
+ std::vector<armnn::ImportedInputId> importedInputIds;
+ if (m_EnableImport)
+ {
+ importedInputIds = m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
+ }
+ std::vector<armnn::ImportedOutputId> importedOutputIds;
+ if (m_EnableExport)
+ {
+ importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
+ }
+ status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
+ importedInputIds, importedOutputIds);
+ }
if (cb.ctx.measureTimings == V1_2::MeasureTiming::YES)
{
@@ -477,24 +613,33 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
V1_2::Timing timing;
timing.timeOnDevice = MicrosecondsDuration(deviceEnd, deviceStart);
timing.timeInDriver = MicrosecondsDuration(driverEnd, cb.ctx.driverStart);
- ALOGV("ArmnnPreparedModel_1_2::execute timing - Device = %lu Driver = %lu", timing.timeOnDevice,
- timing.timeInDriver);
+ ALOGV("ArmnnPreparedModel_1_2::execute timing - Device = %lu Driver = %lu",
+ static_cast<unsigned long>(timing.timeOnDevice), static_cast<unsigned long>(timing.timeInDriver));
cb.callback(V1_0::ErrorStatus::NONE, outputShapes, timing, "ArmnnPreparedModel_1_2::ExecuteGraph");
} else {
cb.callback(V1_0::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_2::ExecuteGraph");
}
+ // Log the total time in this call. This is a good number to compare to that printed out by
+ // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+ ALOGI("ArmnnPreparedModel_1_2::ExecuteGraph Execution time = %lld µs",
+ std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - graphExecutionStart).count());
return true;
}
template<typename HalVersion>
-bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs()
+bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs)
{
std::vector<std::vector<char>> storage;
armnn::InputTensors inputTensors;
- for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
+ for (unsigned int i = 0; i < numInputs; i++)
{
- const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
+ // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+ inputTensorInfo.SetConstant();
+
storage.emplace_back(inputTensorInfo.GetNumBytes());
const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
@@ -502,7 +647,7 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs()
}
armnn::OutputTensors outputTensors;
- for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
+ for (unsigned int i = 0; i < numOutputs; i++)
{
const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
storage.emplace_back(outputTensorInfo.GetNumBytes());
@@ -534,10 +679,13 @@ Return <V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const V1_
ctx.driverStart = Now();
}
- ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
- if (!android::nn::validateRequest(request, m_Model))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
{
callback(V1_0::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute");
return V1_0::ErrorStatus::INVALID_ARGUMENT;
@@ -567,12 +715,21 @@ Return <V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const V1_
{}
}
- ALOGV("ArmnnPreparedModel_1_2::execute(...) before PostMsg");
// post the request for asynchronous execution
CallbackContext_1_2 cb;
cb.callback = callback;
cb.ctx = ctx;
+
+ if (m_AsyncModelExecutionEnabled)
+ {
+ ALOGV("ArmnnPreparedModel_1_2::execute(...) before ScheduleGraphForExecution");
+ ScheduleGraphForExecution(memPools, inputTensors, outputTensors, cb);
+ ALOGV("ArmnnPreparedModel_1_2::execute(...) after ScheduleGraphForExecution");
+ return V1_0::ErrorStatus::NONE;
+ }
+
+ ALOGV("ArmnnPreparedModel_1_2::execute(...) before PostMsg");
m_RequestThread.PostMsg(this, memPools, inputTensors, outputTensors, cb);
ALOGV("ArmnnPreparedModel_1_2::execute(...) after PostMsg");
return V1_0::ErrorStatus::NONE;
@@ -602,6 +759,84 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::configureExecutionBurst(
return Void();
}
+/// Schedule the graph prepared from the request for execution
+template<typename HalVersion>
+template<typename CallbackContext>
+void ArmnnPreparedModel_1_2<HalVersion>::ScheduleGraphForExecution(
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext callbackContext)
+{
+ ALOGV("ArmnnPreparedModel_1_2::ScheduleGraphForExecution(...)");
+
+ DumpTensorsIfRequired("Input", *inputTensors);
+
+ unsigned int outputTensorSize = outputTensors.get()->size();
+ std::vector<V1_2::OutputShape> outputShapes(outputTensorSize);
+ for (unsigned int i = 0; i < outputTensorSize; i++)
+ {
+ std::pair<int, armnn::Tensor> outputTensorPair = outputTensors.get()->at(i);
+ const armnn::Tensor outputTensor = outputTensorPair.second;
+ const armnn::TensorInfo outputTensorInfo = outputTensor.GetInfo();
+
+ outputShapes[i] = ComputeShape(outputTensorInfo);
+ }
+
+ auto tpCb = std::make_shared<
+ ArmnnThreadPoolCallback_1_2<CallbackContext_1_2>>(this,
+ pMemPools,
+ outputShapes,
+ inputTensors,
+ outputTensors,
+ callbackContext);
+
+ m_Threadpool->Schedule(m_NetworkId,
+ *tpCb->m_InputTensors,
+ *tpCb->m_OutputTensors,
+ armnn::QosExecPriority::Medium,
+ tpCb);
+ ALOGV("ArmnnPreparedModel_1_2::ScheduleGraphForExecution end");
+}
+
+template<typename HalVersion>
+template <typename CallbackContext>
+void ArmnnPreparedModel_1_2<HalVersion>::ArmnnThreadPoolCallback_1_2<CallbackContext>::Notify(
+ armnn::Status status, armnn::InferenceTimingPair timeTaken)
+{
+ ALOGV("ArmnnPreparedModel_1_2::ArmnnThreadPoolCallback_1_2 Notify");
+
+ TimePoint driverEnd;
+
+ CommitPools(*m_MemPools);
+
+ m_Model->DumpTensorsIfRequired("Output", *m_OutputTensors);
+
+ if (status != armnn::Status::Success)
+ {
+ ALOGW("ArmnnThreadPoolCallback::Notify EnqueueWorkload failed");
+ m_CallbackContext.callback(
+ V1_0::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel::ExecuteGraph");
+ return;
+ }
+
+ if (m_CallbackContext.ctx.measureTimings == V1_2::MeasureTiming::YES)
+ {
+ driverEnd = std::chrono::steady_clock::now();
+ V1_2::Timing timing;
+ timing.timeOnDevice = MicrosecondsDuration(timeTaken.second, timeTaken.first);
+ timing.timeInDriver = MicrosecondsDuration(driverEnd, m_CallbackContext.ctx.driverStart);
+ ALOGV("ArmnnPreparedModel_1_2::execute timing - Device = %lu Driver = %lu",
+ static_cast<unsigned long>(timing.timeOnDevice), static_cast<unsigned long>(timing.timeInDriver));
+ m_CallbackContext.callback(
+ V1_0::ErrorStatus::NONE, m_OutputShapes, timing, "ArmnnPreparedModel_1_2::ExecuteGraph");
+ } else {
+ m_CallbackContext.callback(
+ V1_0::ErrorStatus::NONE, m_OutputShapes, g_NoTiming, "ArmnnPreparedModel_1_2::ExecuteGraph");
+ }
+ return;
+}
+
#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3)
template class ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>;
template bool ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>::ExecuteGraph<CallbackContext_1_2>(
@@ -609,6 +844,12 @@ template bool ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>::ExecuteGraph<CallbackC
armnn::InputTensors& pInputTensors,
armnn::OutputTensors& pOutputTensors,
CallbackContext_1_2 cb);
+
+template void ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>::ScheduleGraphForExecution<CallbackContext_1_2>(
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext_1_2 callbackContext);
#endif
} // namespace armnn_driver
diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp
index 13d7494e..57deb98c 100644
--- a/ArmnnPreparedModel_1_2.hpp
+++ b/ArmnnPreparedModel_1_2.hpp
@@ -12,6 +12,7 @@
#include <NeuralNetworks.h>
#include <armnn/ArmNN.hpp>
+#include <armnn/Threadpool.hpp>
#include <string>
#include <vector>
@@ -44,7 +45,21 @@ public:
armnn::IRuntime* runtime,
const HalModel& model,
const std::string& requestInputsAndOutputsDumpDir,
- const bool gpuProfilingEnabled);
+ const bool gpuProfilingEnabled,
+ const bool asyncModelExecutionEnabled = false,
+ const unsigned int numberOfThreads = 1,
+ const bool importEnabled = false,
+ const bool exportEnabled = false);
+
+ ArmnnPreparedModel_1_2(armnn::NetworkId networkId,
+ armnn::IRuntime* runtime,
+ const std::string& requestInputsAndOutputsDumpDir,
+ const bool gpuProfilingEnabled,
+ const bool asyncModelExecutionEnabled = false,
+ const unsigned int numberOfThreads = 1,
+ const bool importEnabled = false,
+ const bool exportEnabled = false,
+ const bool preparedFromCache = false);
virtual ~ArmnnPreparedModel_1_2();
@@ -73,9 +88,38 @@ public:
/// Executes this model with dummy inputs (e.g. all zeroes).
/// \return false on failure, otherwise true
- bool ExecuteWithDummyInputs();
+ bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs);
private:
+
+ template<typename CallbackContext>
+ class ArmnnThreadPoolCallback_1_2 : public armnn::IAsyncExecutionCallback
+ {
+ public:
+ ArmnnThreadPoolCallback_1_2(ArmnnPreparedModel_1_2<HalVersion>* model,
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::vector<V1_2::OutputShape> outputShapes,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext callbackContext) :
+ m_Model(model),
+ m_MemPools(pMemPools),
+ m_OutputShapes(outputShapes),
+ m_InputTensors(inputTensors),
+ m_OutputTensors(outputTensors),
+ m_CallbackContext(callbackContext)
+ {}
+
+ void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override;
+
+ ArmnnPreparedModel_1_2<HalVersion>* m_Model;
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
+ std::vector<V1_2::OutputShape> m_OutputShapes;
+ std::shared_ptr<armnn::InputTensors> m_InputTensors;
+ std::shared_ptr<armnn::OutputTensors> m_OutputTensors;
+ CallbackContext m_CallbackContext;
+ };
+
Return<V1_0::ErrorStatus> Execute(const V1_0::Request& request,
V1_2::MeasureTiming measureTiming,
CallbackAsync_1_2 callback);
@@ -101,17 +145,32 @@ private:
template <typename TensorBindingCollection>
void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings);
- armnn::NetworkId m_NetworkId;
- armnn::IRuntime* m_Runtime;
- V1_2::Model m_Model;
+ /// schedule the graph prepared from the request for execution
+ template<typename CallbackContext>
+ void ScheduleGraphForExecution(
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext m_CallbackContext);
+
+ armnn::NetworkId m_NetworkId;
+ armnn::IRuntime* m_Runtime;
+ V1_2::Model m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
static RequestThread<ArmnnPreparedModel_1_2,
HalVersion,
- CallbackContext_1_2> m_RequestThread;
- uint32_t m_RequestCount;
- const std::string& m_RequestInputsAndOutputsDumpDir;
- const bool m_GpuProfilingEnabled;
+ CallbackContext_1_2> m_RequestThread;
+ uint32_t m_RequestCount;
+ const std::string& m_RequestInputsAndOutputsDumpDir;
+ const bool m_GpuProfilingEnabled;
+ // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+ static std::unique_ptr<armnn::Threadpool> m_Threadpool;
+ std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle;
+ const bool m_AsyncModelExecutionEnabled;
+ const bool m_EnableImport;
+ const bool m_EnableExport;
+ const bool m_PreparedFromCache;
};
}
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index 2970e8ff..1827d900 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
// Note: the ArmnnFencedExecutionCallback and code snippet in the executeFenced() function
@@ -12,6 +12,8 @@
#include "ArmnnPreparedModel_1_3.hpp"
#include "Utils.hpp"
+#include <armnn/Types.hpp>
+
#include <Utils.h>
#include <android/sync.h>
#include <log/log.h>
@@ -19,7 +21,7 @@
#include <ExecutionBurstServer.h>
#include <ValidateHal.h>
-#include <cassert>
+#include <chrono>
#include <cinttypes>
#ifdef ARMNN_ANDROID_S
@@ -145,6 +147,9 @@ RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3>
ArmnnPreparedModel_1_3<HalVersion>::m_RequestThread;
template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel_1_3<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
template<typename TensorBindingCollection>
void ArmnnPreparedModel_1_3<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
const TensorBindingCollection& tensorBindings)
@@ -168,7 +173,11 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw
const V1_3::Model& model,
const std::string& requestInputsAndOutputsDumpDir,
const bool gpuProfilingEnabled,
- V1_3::Priority priority)
+ V1_3::Priority priority,
+ const bool asyncModelExecutionEnabled,
+ const unsigned int numberOfThreads,
+ const bool importEnabled,
+ const bool exportEnabled)
: m_NetworkId(networkId)
, m_Runtime(runtime)
, m_Model(model)
@@ -176,9 +185,79 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw
, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
, m_GpuProfilingEnabled(gpuProfilingEnabled)
, m_ModelPriority(priority)
+ , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+ , m_EnableImport(importEnabled)
+ , m_EnableExport(exportEnabled)
+ , m_PreparedFromCache(false)
{
// Enable profiling if required.
m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+ if (m_AsyncModelExecutionEnabled)
+ {
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ for (unsigned int i=0; i < numberOfThreads; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ if (!m_Threadpool)
+ {
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+ }
+ else
+ {
+ m_Threadpool->LoadMemHandles(memHandles);
+ }
+
+ m_WorkingMemHandle = memHandles.back();
+ }
+}
+
+template<typename HalVersion>
+ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId networkId,
+ armnn::IRuntime* runtime,
+ const std::string& requestInputsAndOutputsDumpDir,
+ const bool gpuProfilingEnabled,
+ V1_3::Priority priority,
+ const bool asyncModelExecutionEnabled,
+ const unsigned int numberOfThreads,
+ const bool importEnabled,
+ const bool exportEnabled,
+ const bool preparedFromCache)
+ : m_NetworkId(networkId)
+ , m_Runtime(runtime)
+ , m_RequestCount(0)
+ , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+ , m_GpuProfilingEnabled(gpuProfilingEnabled)
+ , m_ModelPriority(priority)
+ , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+ , m_EnableImport(importEnabled)
+ , m_EnableExport(exportEnabled)
+ , m_PreparedFromCache(preparedFromCache)
+{
+ // Enable profiling if required.
+ m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+ if (m_AsyncModelExecutionEnabled)
+ {
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ for (unsigned int i=0; i < numberOfThreads; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ if (!m_Threadpool)
+ {
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+ }
+ else
+ {
+ m_Threadpool->LoadMemHandles(memHandles);
+ }
+
+ m_WorkingMemHandle = memHandles.back();
+ }
}
template<typename HalVersion>
@@ -186,12 +265,21 @@ ArmnnPreparedModel_1_3<HalVersion>::~ArmnnPreparedModel_1_3()
{
// Get a hold of the profiler used by this model.
std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+ if (profiler && m_GpuProfilingEnabled)
+ {
+ // Dump the profiling info to a file if required.
+ DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId,
+ profiler.get());
+ }
// Unload the network associated with this model.
m_Runtime->UnloadNetwork(m_NetworkId);
- // Dump the profiling info to a file if required.
- DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
+ // Unload the network memhandles from the threadpool
+ if (m_AsyncModelExecutionEnabled)
+ {
+ m_Threadpool->UnloadMemHandles(m_NetworkId);
+ }
}
template<typename HalVersion>
@@ -312,7 +400,7 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Reque
ALOGW("ArmnnPreparedModel_1_3::executeFenced parameter loopTimeoutDuration is set but not supported.");
}
- if (!android::nn::validateRequest(request, m_Model, /*allowUnspecifiedOutput=*/false))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model, /*allowUnspecifiedOutput=*/false))
{
ALOGV("ArmnnPreparedModel_1_3::executeFenced outputs must be specified for fenced execution ");
cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
@@ -326,7 +414,10 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Reque
ctx.driverStart = Now();
}
- ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
if (!m_RequestInputsAndOutputsDumpDir.empty())
@@ -343,6 +434,21 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Reque
auto fenceNativeHandle = fenceWaitFor[index].getNativeHandle();
if (!fenceNativeHandle)
{
+ ALOGE("ArmnnPreparedModel_1_3::executeFenced null native handle.");
+ cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
+ return Void();
+ }
+
+ if (fenceNativeHandle->numFds != 1)
+ {
+ ALOGE("ArmnnPreparedModel_1_3::executeFenced invalid fenceHandle numFds.");
+ cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
+ return Void();
+ }
+
+ if (fenceNativeHandle->numInts != 0)
+ {
+ ALOGE("ArmnnPreparedModel_1_3::executeFenced invalid fenceHandle numInts.");
cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
return Void();
}
@@ -399,7 +505,8 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Reque
fenceTiming.timeOnDevice = MicrosecondsDuration(ctx.deviceEnd, ctx.deviceStart);
fenceTiming.timeInDriver = MicrosecondsDuration(ctx.driverEnd, fenceExecutionStart);
ALOGV("ArmnnPreparedModel_1_3::fenceFinishExecutionTiming - Device = %lu Driver = %lu",
- fenceTiming.timeOnDevice, fenceTiming.timeInDriver);
+ static_cast<unsigned long>(fenceTiming.timeOnDevice),
+ static_cast<unsigned long>(fenceTiming.timeInDriver));
}
sp<ArmnnFencedExecutionCallback> armnnFencedExecutionCallback =
@@ -418,8 +525,20 @@ Return<V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::PrepareMemoryForIn
for (unsigned int i = 0; i < request.inputs.size(); i++)
{
const auto& inputArg = request.inputs[i];
+ armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ // inputs (of type InputTensors) is composed of a vector of ConstTensors.
+ // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+ inputTensorInfo.SetConstant();
+ auto result = ValidateRequestArgument<V1_3::ErrorStatus, V1_3::Request>(request,
+ inputTensorInfo,
+ inputArg,
+ "input");
+
+ if (result != V1_3::ErrorStatus::NONE)
+ {
+ return result;
+ }
- const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
if (inputTensor.GetMemoryArea() == nullptr)
@@ -445,15 +564,24 @@ Return<V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::PrepareMemoryForOu
for (unsigned int i = 0; i < request.outputs.size(); i++)
{
const auto& outputArg = request.outputs[i];
-
armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
+ auto result = ValidateRequestArgument<V1_3::ErrorStatus, V1_3::Request>(request,
+ outputTensorInfo,
+ outputArg,
+ "output");
+
+ if (result != V1_3::ErrorStatus::NONE)
+ {
+ return result;
+ }
+
const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools);
+
if (outputTensor.GetMemoryArea() == nullptr)
{
ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
-
const size_t outputSize = outputTensorInfo.GetNumBytes();
unsigned int count = 0;
@@ -485,9 +613,8 @@ Return<V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::PrepareMemoryForOu
size_t bufferSize = 0;
#if !defined(ARMNN_ANDROID_S)
bufferSize = memPools.at(outputArg.location.poolIndex).getHidlMemory().size();
- if (bufferSize < outputSize)
#else
- bufferSize = memPools.at(outputArg.location.poolIndex).getMemory().size;
+ bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
#endif
if (bufferSize < outputSize)
{
@@ -557,7 +684,7 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::ExecuteSynchronously(const V1_3
cbCtx.ctx.driverStart = Now();
}
- if (!android::nn::validateRequest(convertToV1_3(request), m_Model))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(convertToV1_3(request), m_Model))
{
ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model");
cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT,
@@ -567,7 +694,7 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::ExecuteSynchronously(const V1_3
return Void();
}
- if (!android::nn::validateRequest(request, m_Model))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
{
ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model");
cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT,
@@ -604,7 +731,10 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeSynchronously(const V1_0
V1_2::MeasureTiming measureTiming,
executeSynchronously_cb cb)
{
- ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
if (cb == nullptr)
@@ -637,7 +767,10 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeSynchronously_1_3(
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
executeSynchronously_1_3_cb cb)
{
- ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
if (cb == nullptr)
@@ -706,6 +839,8 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
CallbackContext cb)
{
ALOGV("ArmnnPreparedModel_1_3::ExecuteGraph(...)");
+ // Capture the graph execution start time.
+ std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
DumpTensorsIfRequired("Input", inputTensors);
@@ -726,8 +861,29 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
{
cb.ctx.deviceStart = Now();
}
-
- armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+ armnn::Status status;
+ if (m_AsyncModelExecutionEnabled)
+ {
+ ALOGW("ArmnnPreparedModel_1_3::ExecuteGraph m_AsyncModelExecutionEnabled true");
+ status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
+ }
+ else
+ {
+ ALOGW("ArmnnPreparedModel_1_3::ExecuteGraph m_AsyncModelExecutionEnabled false");
+ // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
+ std::vector<armnn::ImportedInputId> importedInputIds;
+ if (m_EnableImport)
+ {
+ importedInputIds = m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
+ }
+ std::vector<armnn::ImportedOutputId> importedOutputIds;
+ if (m_EnableExport)
+ {
+ importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
+ }
+ status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
+ importedInputIds, importedOutputIds);
+ }
if (cb.ctx.measureTimings == V1_2::MeasureTiming::YES)
{
@@ -735,7 +891,7 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
}
if (status != armnn::Status::Success)
{
- ALOGW("EnqueueWorkload failed");
+ ALOGW("ArmnnPreparedModel_1_3::ExecuteGraph EnqueueWorkload failed");
cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
@@ -763,24 +919,74 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
V1_2::Timing timing;
timing.timeOnDevice = MicrosecondsDuration(cb.ctx.deviceEnd, cb.ctx.deviceStart);
timing.timeInDriver = MicrosecondsDuration(cb.ctx.driverEnd, cb.ctx.driverStart);
- ALOGV("ArmnnPreparedModel_1_3::execute timing - Device = %lu Driver = %lu", timing.timeOnDevice,
- timing.timeInDriver);
+ ALOGV("ArmnnPreparedModel_1_3::execute timing - Device = %lu Driver = %lu",
+ static_cast<unsigned long>(timing.timeOnDevice), static_cast<unsigned long>(timing.timeInDriver));
cb.callback(V1_3::ErrorStatus::NONE, outputShapes, timing, "ArmnnPreparedModel_1_3::ExecuteGraph");
} else
{
cb.callback(V1_3::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
}
+ // Log the total time in this call. This is a good number to compare to that printed out by
+ // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+ ALOGI("ArmnnPreparedModel_1_3::ExecuteGraph Execution time = %lld µs",
+ std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - graphExecutionStart).count());
return V1_3::ErrorStatus::NONE;
}
+/// Schedule the graph prepared from the request for execution
template<typename HalVersion>
-bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs()
+template<typename CallbackContext>
+void ArmnnPreparedModel_1_3<HalVersion>::ScheduleGraphForExecution(
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext callbackContext,
+ armnn::QosExecPriority priority)
+{
+ ALOGV("ArmnnPreparedModel_1_3::ScheduleGraphForExecution(...)");
+
+ DumpTensorsIfRequired("Input", *inputTensors);
+
+ unsigned int outputTensorSize = outputTensors.get()->size();
+ std::vector<V1_2::OutputShape> outputShapes(outputTensorSize);
+ for (unsigned int i = 0; i < outputTensorSize; i++)
+ {
+ std::pair<int, armnn::Tensor> outputTensorPair = outputTensors.get()->at(i);
+ const armnn::Tensor outputTensor = outputTensorPair.second;
+ const armnn::TensorInfo outputTensorInfo = outputTensor.GetInfo();
+
+ outputShapes[i] = ComputeShape(outputTensorInfo);
+ }
+
+ auto tpCb = std::make_shared<
+ ArmnnThreadPoolCallback_1_3<CallbackContext_1_3>>(this,
+ pMemPools,
+ outputShapes,
+ inputTensors,
+ outputTensors,
+ callbackContext);
+
+ m_Threadpool->Schedule(m_NetworkId,
+ *tpCb->m_InputTensors,
+ *tpCb->m_OutputTensors,
+ priority,
+ tpCb);
+ ALOGV("ArmnnPreparedModel_1_3::ScheduleGraphForExecution end");
+}
+
+template<typename HalVersion>
+bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs)
{
std::vector<std::vector<char>> storage;
armnn::InputTensors inputTensors;
- for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
+ for (unsigned int i = 0; i < numInputs; i++)
{
- const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
+ // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+ inputTensorInfo.SetConstant();
+
storage.emplace_back(inputTensorInfo.GetNumBytes());
const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
@@ -788,7 +994,7 @@ bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs()
}
armnn::OutputTensors outputTensors;
- for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
+ for (unsigned int i = 0; i < numOutputs; i++)
{
const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
storage.emplace_back(outputTensorInfo.GetNumBytes());
@@ -822,10 +1028,13 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::Execute(const V1_
ctx.driverStart = Now();
}
- ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str());
+ if (!m_PreparedFromCache)
+ {
+ ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str());
+ }
m_RequestCount++;
- if (!android::nn::validateRequest(request, m_Model))
+ if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
{
callback(V1_3::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute");
return V1_3::ErrorStatus::INVALID_ARGUMENT;
@@ -857,16 +1066,51 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::Execute(const V1_
return V1_3::ErrorStatus::NONE;
case V1_3::ErrorStatus::GENERAL_FAILURE:
return V1_3::ErrorStatus::GENERAL_FAILURE;
+ case V1_3::ErrorStatus::INVALID_ARGUMENT:
+ return V1_3::ErrorStatus::INVALID_ARGUMENT;
default:
{}
}
-
- ALOGV("ArmnnPreparedModel_1_3::execute(...) before PostMsg");
-
- // post the request for asynchronous execution
CallbackContext_1_3 cb;
cb.callback = callback;
cb.ctx = ctx;
+
+
+ enum class QosExecPriority
+ {
+ Low = 0,
+ Medium = 1,
+ High = 2
+ };
+
+
+ if (m_AsyncModelExecutionEnabled)
+ {
+ armnn::QosExecPriority priority;
+
+ switch (GetModelPriority()) {
+ case V1_3::Priority::LOW:
+ priority = armnn::QosExecPriority::Low;
+ break;
+ case V1_3::Priority::MEDIUM:
+ priority = armnn::QosExecPriority::Medium;
+ break;
+ case V1_3::Priority::HIGH:
+ priority = armnn::QosExecPriority::High;
+ break;
+ default:
+ priority = armnn::QosExecPriority::Medium;
+
+ }
+
+ ALOGV("ArmnnPreparedModel_1_3::execute(...) before ScheduleGraphForExecution");
+ ScheduleGraphForExecution(memPools, inputTensors, outputTensors, cb, priority);
+ ALOGV("ArmnnPreparedModel_1_3::execute(...) after ScheduleGraphForExecution");
+ return V1_3::ErrorStatus::NONE;
+ }
+
+ ALOGV("ArmnnPreparedModel_1_3::execute(...) before PostMsg");
+ // post the request for asynchronous execution
m_RequestThread.PostMsg(this, memPools, inputTensors, outputTensors, cb);
ALOGV("ArmnnPreparedModel_1_3::execute(...) after PostMsg");
return V1_3::ErrorStatus::NONE;
@@ -878,6 +1122,46 @@ V1_3::Priority ArmnnPreparedModel_1_3<HalVersion>::GetModelPriority()
return m_ModelPriority;
}
+template<typename HalVersion>
+template <typename CallbackContext>
+void ArmnnPreparedModel_1_3<HalVersion>::ArmnnThreadPoolCallback_1_3<CallbackContext>::Notify(
+ armnn::Status status, armnn::InferenceTimingPair timeTaken)
+{
+ ALOGV("ArmnnPreparedModel_1_3::ArmnnThreadPoolCallback_1_3<CallbackContext>::Notify");
+ CommitPools(*m_MemPools);
+
+ m_Model->DumpTensorsIfRequired("Output", *m_OutputTensors);
+
+ if (status != armnn::Status::Success)
+ {
+ ALOGW("ArmnnThreadPoolCallback_1_3::Notify EnqueueWorkload failed");
+ m_CallbackContext.callback(V1_3::ErrorStatus::GENERAL_FAILURE,
+ {},
+ g_NoTiming,
+ "ArmnnPreparedModel_1_3::ArmnnThreadPoolCallback_1_3");
+ return;
+ }
+
+ if (m_CallbackContext.ctx.measureTimings == V1_2::MeasureTiming::YES)
+ {
+ m_CallbackContext.ctx.deviceStart = timeTaken.first;
+ m_CallbackContext.ctx.deviceEnd = timeTaken.second;
+ m_CallbackContext.ctx.driverEnd = std::chrono::steady_clock::now();
+ V1_2::Timing timing;
+ timing.timeOnDevice = MicrosecondsDuration(m_CallbackContext.ctx.deviceEnd, m_CallbackContext.ctx.deviceStart);
+ timing.timeInDriver = MicrosecondsDuration(m_CallbackContext.ctx.driverEnd, m_CallbackContext.ctx.driverStart);
+ ALOGV("ArmnnPreparedModel_1_3::execute timing - Device = %lu Driver = %lu",
+ static_cast<unsigned long>(timing.timeOnDevice), static_cast<unsigned long>(timing.timeInDriver));
+ m_CallbackContext.callback(
+ V1_3::ErrorStatus::NONE, m_OutputShapes, timing, "ArmnnPreparedModel_1_3::ExecuteGraph");
+ } else
+ {
+ m_CallbackContext.callback(
+ V1_3::ErrorStatus::NONE, m_OutputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
+ }
+ return;
+}
+
#ifdef ARMNN_ANDROID_NN_V1_3
template class ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>;
template Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>::ExecuteGraph<CallbackContext_1_3>(
@@ -885,6 +1169,13 @@ template Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>::
armnn::InputTensors& pInputTensors,
armnn::OutputTensors& pOutputTensors,
CallbackContext_1_3 cb);
+
+template void ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>::ScheduleGraphForExecution<CallbackContext_1_3>(
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext_1_3 callbackContext,
+ armnn::QosExecPriority priority);
#endif
} // namespace armnn_driver
diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp
index c6cdcdc7..6c1c5c26 100644
--- a/ArmnnPreparedModel_1_3.hpp
+++ b/ArmnnPreparedModel_1_3.hpp
@@ -12,6 +12,8 @@
#include <NeuralNetworks.h>
#include <armnn/ArmNN.hpp>
+#include <armnn/Threadpool.hpp>
+
#include <string>
#include <vector>
@@ -51,7 +53,22 @@ public:
const HalModel& model,
const std::string& requestInputsAndOutputsDumpDir,
const bool gpuProfilingEnabled,
- V1_3::Priority priority = V1_3::Priority::MEDIUM);
+ V1_3::Priority priority = V1_3::Priority::MEDIUM,
+ const bool asyncModelExecutionEnabled = false,
+ const unsigned int numberOfThreads = 1,
+ const bool importEnabled = false,
+ const bool exportEnabled = false);
+
+ ArmnnPreparedModel_1_3(armnn::NetworkId networkId,
+ armnn::IRuntime* runtime,
+ const std::string& requestInputsAndOutputsDumpDir,
+ const bool gpuProfilingEnabled,
+ V1_3::Priority priority = V1_3::Priority::MEDIUM,
+ const bool asyncModelExecutionEnabled = false,
+ const unsigned int numberOfThreads = 1,
+ const bool importEnabled = false,
+ const bool exportEnabled = false,
+ const bool preparedFromCache = false);
virtual ~ArmnnPreparedModel_1_3();
@@ -104,11 +121,40 @@ public:
/// Executes this model with dummy inputs (e.g. all zeroes).
/// \return false on failure, otherwise true
- bool ExecuteWithDummyInputs();
+ bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs);
V1_3::Priority GetModelPriority();
private:
+
+ template<typename CallbackContext>
+ class ArmnnThreadPoolCallback_1_3 : public armnn::IAsyncExecutionCallback
+ {
+ public:
+ ArmnnThreadPoolCallback_1_3(ArmnnPreparedModel_1_3<HalVersion>* model,
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::vector<V1_2::OutputShape> outputShapes,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext callbackContext) :
+ m_Model(model),
+ m_MemPools(pMemPools),
+ m_OutputShapes(outputShapes),
+ m_InputTensors(inputTensors),
+ m_OutputTensors(outputTensors),
+ m_CallbackContext(callbackContext)
+ {}
+
+ void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override;
+
+ ArmnnPreparedModel_1_3<HalVersion>* m_Model;
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
+ std::vector<V1_2::OutputShape> m_OutputShapes;
+ std::shared_ptr<armnn::InputTensors> m_InputTensors;
+ std::shared_ptr<armnn::OutputTensors> m_OutputTensors;
+ CallbackContext m_CallbackContext;
+ };
+
Return <V1_3::ErrorStatus> Execute(const V1_3::Request& request,
V1_2::MeasureTiming measureTiming,
CallbackAsync_1_3 callback);
@@ -133,16 +179,35 @@ private:
template <typename TensorBindingCollection>
void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings);
- armnn::NetworkId m_NetworkId;
- armnn::IRuntime* m_Runtime;
- V1_3::Model m_Model;
+ /// schedule the graph prepared from the request for execution
+ template<typename CallbackContext>
+ void ScheduleGraphForExecution(
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext m_CallbackContext,
+ armnn::QosExecPriority priority);
+
+ armnn::NetworkId m_NetworkId;
+ armnn::IRuntime* m_Runtime;
+ V1_3::Model m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
- static RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> m_RequestThread;
- uint32_t m_RequestCount;
- const std::string& m_RequestInputsAndOutputsDumpDir;
- const bool m_GpuProfilingEnabled;
- V1_3::Priority m_ModelPriority;
+ static RequestThread_1_3<ArmnnPreparedModel_1_3,
+ HalVersion,
+ CallbackContext_1_3> m_RequestThread;
+ uint32_t m_RequestCount;
+ const std::string& m_RequestInputsAndOutputsDumpDir;
+ const bool m_GpuProfilingEnabled;
+ V1_3::Priority m_ModelPriority;
+
+ // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+ static std::unique_ptr<armnn::Threadpool> m_Threadpool;
+ std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle;
+ const bool m_AsyncModelExecutionEnabled;
+ const bool m_EnableImport;
+ const bool m_EnableExport;
+ const bool m_PreparedFromCache;
};
}
diff --git a/CacheDataHandler.cpp b/CacheDataHandler.cpp
new file mode 100644
index 00000000..5f3a3076
--- /dev/null
+++ b/CacheDataHandler.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CacheDataHandler.hpp"
+
+#include <log/log.h>
+
+namespace armnn_driver
+{
+
+CacheDataHandler& CacheDataHandlerInstance()
+{
+ static CacheDataHandler instance;
+ return instance;
+}
+
+void CacheDataHandler::Register(const HidlToken token, const size_t hashValue, const size_t cacheSize)
+{
+ if (m_CacheDataMap.find(hashValue) != m_CacheDataMap.end()
+ && m_CacheDataMap.at(hashValue).GetToken() == token
+ && m_CacheDataMap.at(hashValue).GetCacheSize() == cacheSize)
+ {
+ ALOGV("CacheHandler::Register() Hash value has already registered.");
+ return;
+ }
+ CacheHandle cacheHandle(token, cacheSize);
+ m_CacheDataMap.insert({hashValue, cacheHandle});
+}
+
+bool CacheDataHandler::Validate(const HidlToken token, const size_t hashValue, const size_t cacheSize) const
+{
+ return (m_CacheDataMap.find(hashValue) != m_CacheDataMap.end()
+ && m_CacheDataMap.at(hashValue).GetToken() == token
+ && m_CacheDataMap.at(hashValue).GetCacheSize() == cacheSize);
+}
+
+size_t CacheDataHandler::Hash(std::vector<uint8_t>& cacheData)
+{
+ std::size_t hash = cacheData.size();
+ for (auto& i : cacheData)
+ {
+ hash = ((hash << 5) - hash) + i;
+ }
+ return hash;
+}
+
+size_t CacheDataHandler::GetCacheSize(HidlToken token)
+{
+ for (auto i = m_CacheDataMap.begin(); i != m_CacheDataMap.end(); ++i)
+ {
+ if (i->second.GetToken() == token)
+ {
+ return i->second.GetCacheSize();
+ }
+ }
+ return 0;
+}
+
+void CacheDataHandler::Clear()
+{
+ m_CacheDataMap.clear();
+}
+
+} // armnn_driver
diff --git a/CacheDataHandler.hpp b/CacheDataHandler.hpp
new file mode 100644
index 00000000..5b1b2951
--- /dev/null
+++ b/CacheDataHandler.hpp
@@ -0,0 +1,68 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <HalInterfaces.h>
+
+#include <vector>
+#include <unordered_map>
+
+#include <NeuralNetworks.h>
+
+namespace armnn_driver
+{
+
+using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+class CacheHandle
+{
+public:
+ CacheHandle(const HidlToken token, const size_t cacheSize)
+ : m_HidlToken(token), m_CacheSize(cacheSize) {}
+
+ ~CacheHandle() {};
+
+ HidlToken GetToken() const
+ {
+ return m_HidlToken;
+ }
+
+ size_t GetCacheSize() const
+ {
+ return m_CacheSize;
+ }
+
+private:
+ const HidlToken m_HidlToken;
+ const size_t m_CacheSize;
+};
+
+class CacheDataHandler
+{
+public:
+ CacheDataHandler() {}
+ ~CacheDataHandler() {}
+
+ void Register(const HidlToken token, const size_t hashValue, const size_t cacheSize);
+
+ bool Validate(const HidlToken token, const size_t hashValue, const size_t cacheSize) const;
+
+ size_t Hash(std::vector<uint8_t>& cacheData);
+
+ size_t GetCacheSize(HidlToken token);
+
+ void Clear();
+
+private:
+ CacheDataHandler(const CacheDataHandler&) = delete;
+ CacheDataHandler& operator=(const CacheDataHandler&) = delete;
+
+ std::unordered_map<size_t, CacheHandle> m_CacheDataMap;
+};
+
+CacheDataHandler& CacheDataHandlerInstance();
+
+} // armnn_driver
diff --git a/ConversionUtils.cpp b/ConversionUtils.cpp
index 9cc6e286..c691c554 100644
--- a/ConversionUtils.cpp
+++ b/ConversionUtils.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -31,7 +31,11 @@ bool LayerInputHandle::IsValid() const
void LayerInputHandle::Connect(armnn::IInputSlot& inputSlot)
{
- ARMNN_ASSERT(IsValid());
+ if (!IsValid())
+ {
+ throw armnn::RuntimeException("LayerInputHandle is invalid");
+ }
+
if (m_OutputSlot)
{
m_OutputSlot->Connect(inputSlot);
@@ -40,7 +44,10 @@ void LayerInputHandle::Connect(armnn::IInputSlot& inputSlot)
void LayerInputHandle::Disconnect(armnn::IInputSlot& inputSlot)
{
- ARMNN_ASSERT(IsValid());
+ if (!IsValid())
+ {
+ throw armnn::RuntimeException("LayerInputHandle is invalid");
+ }
if (m_OutputSlot)
{
m_OutputSlot->Disconnect(inputSlot);
@@ -52,14 +59,31 @@ const armnn::TensorInfo& LayerInputHandle::GetTensorInfo() const
return m_TensorInfo;
}
+void LayerInputHandle::SanitizeQuantizationScale(LayerInputHandle& weight,
+ LayerInputHandle& input)
+{
+ if (m_OutputSlot)
+ {
+ armnn::TensorInfo weightInfo = weight.GetTensorInfo();
+ armnn::TensorInfo inputInfo = input.GetTensorInfo();
+ armnn::TensorInfo biasInfo = GetTensorInfo();
+
+ SanitizeBiasQuantizationScale(biasInfo, weightInfo, inputInfo);
+
+ m_TensorInfo = biasInfo;
+ m_OutputSlot->SetTensorInfo(biasInfo);
+ }
+}
+
ConstTensorPin::ConstTensorPin(bool optional)
: m_Optional(optional)
{}
-ConstTensorPin::ConstTensorPin(const armnn::TensorInfo& tensorInfo,
+ConstTensorPin::ConstTensorPin(armnn::TensorInfo& tensorInfo,
const void* valueStart,
uint32_t numBytes,
const armnn::PermutationVector& mappings)
+ : m_Optional(false)
{
armnn::IgnoreUnused(numBytes);
if (tensorInfo.GetNumBytes() != numBytes)
@@ -73,7 +97,7 @@ ConstTensorPin::ConstTensorPin(const armnn::TensorInfo& tensorInfo,
m_SwizzledTensorData.resize(tensorInfo.GetNumBytes());
SwizzleAndroidNn4dTensorToArmNn(tensorInfo, valueStart, m_SwizzledTensorData.data(), mappings);
- m_ConstTensor = armnn::ConstTensor(armnnUtils::Permuted(tensorInfo, mappings), m_SwizzledTensorData.data());
+ m_ConstTensor = armnn::ConstTensor(tensorInfo, m_SwizzledTensorData.data());
}
else
{
@@ -115,8 +139,11 @@ armnn::IConnectableLayer* ProcessActivation(const armnn::TensorInfo& tensorInfo,
armnn::IConnectableLayer* prevLayer,
ConversionData& data)
{
- ARMNN_ASSERT(prevLayer->GetNumOutputSlots() == 1);
-
+ if (prevLayer->GetNumOutputSlots() != 1)
+ {
+ Fail("%s: Incorrect Number of OutputSlots expected 1 was %i", __func__, prevLayer->GetNumOutputSlots());
+ return nullptr;
+ }
prevLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
armnn::IConnectableLayer* activationLayer = prevLayer;
@@ -164,10 +191,12 @@ armnn::IConnectableLayer* ProcessActivation(const armnn::TensorInfo& tensorInfo,
}
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsActivationSupported,
data.m_Backends,
isSupported,
+ setBackend,
prevLayer->GetOutputSlot(0).GetTensorInfo(),
tensorInfo,
activationDesc);
@@ -177,6 +206,7 @@ armnn::IConnectableLayer* ProcessActivation(const armnn::TensorInfo& tensorInfo,
}
activationLayer = data.m_Network->AddActivationLayer(activationDesc);
+ activationLayer->SetBackendId(setBackend);
prevLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
activationLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
diff --git a/ConversionUtils.hpp b/ConversionUtils.hpp
index 439d4a4a..2e3a0424 100644
--- a/ConversionUtils.hpp
+++ b/ConversionUtils.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -8,9 +8,7 @@
#include "Utils.hpp"
#include <armnn/ArmNN.hpp>
-#include <armnn/ILayerSupport.hpp>
#include <armnn/BackendHelper.hpp>
-#include <armnn/utility/Assert.hpp>
#include <armnn/utility/IgnoreUnused.hpp>
#include <armnn/utility/NumericCast.hpp>
@@ -73,6 +71,9 @@ public:
const armnn::TensorInfo& GetTensorInfo() const;
+ void SanitizeQuantizationScale(LayerInputHandle& weight,
+ LayerInputHandle& input);
+
private:
armnn::IOutputSlot* m_OutputSlot;
bool m_Valid;
@@ -90,7 +91,7 @@ public:
// @param valueStart Start address of tensor data. Belongs to one of the memory pools associated with
// the model being converted.
// @param numBytes Number of bytes for the tensor data.
- ConstTensorPin(const armnn::TensorInfo& tensorInfo, const void* valueStart, uint32_t numBytes,
+ ConstTensorPin(armnn::TensorInfo& tensorInfo, const void* valueStart, uint32_t numBytes,
const armnn::PermutationVector& mappings);
ConstTensorPin(const ConstTensorPin& other) = delete;
@@ -137,7 +138,7 @@ static bool Fail(const char* formatStr, Args&&... args)
// Convenience macro to call an Is*Supported function and log caller name together with reason for lack of support.
// Called as: FORWARD_LAYER_SUPPORT_FUNC(__func__, Is*Supported, backends, a, b, c, d, e)
-#define FORWARD_LAYER_SUPPORT_FUNC(funcName, func, backends, supported, ...) \
+#define FORWARD_LAYER_SUPPORT_FUNC(funcName, func, backends, supported, setBackend, ...) \
try \
{ \
for (auto&& backendId : backends) \
@@ -150,6 +151,7 @@ try \
layerSupportObject.func(__VA_ARGS__, armnn::Optional<std::string&>(reasonIfUnsupported)); \
if (supported) \
{ \
+ setBackend = backendId; \
break; \
} \
else \
@@ -278,7 +280,10 @@ armnn::IConnectableLayer& AddReshapeLayer(armnn::INetwork& network,
reshapeDescriptor.m_TargetShape = reshapeInfo.GetShape();
armnn::IConnectableLayer* reshapeLayer = network.AddReshapeLayer(reshapeDescriptor);
- ARMNN_ASSERT(reshapeLayer != nullptr);
+ if (!reshapeLayer)
+ {
+ throw armnn::RuntimeException("ReshapeLayer is null");
+ }
// Attach the input layer to the reshape layer
inputLayer.Connect(reshapeLayer->GetInputSlot(0));
@@ -292,7 +297,10 @@ bool BroadcastTensor(LayerInputHandle& input0,
armnn::IConnectableLayer* startLayer,
ConversionData& data)
{
- ARMNN_ASSERT(startLayer != nullptr);
+ if (!startLayer)
+ {
+ throw armnn::RuntimeException("StartLayer is null");
+ }
const armnn::TensorInfo& inputInfo0 = input0.GetTensorInfo();
const armnn::TensorInfo& inputInfo1 = input1.GetTensorInfo();
@@ -335,10 +343,12 @@ bool BroadcastTensor(LayerInputHandle& input0,
armnn::ReshapeDescriptor reshapeDescriptor;
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsReshapeSupported,
data.m_Backends,
isSupported,
+ setBackend,
smallInfo,
reshapedInfo,
reshapeDescriptor);
@@ -347,8 +357,13 @@ bool BroadcastTensor(LayerInputHandle& input0,
return false;
}
- ARMNN_ASSERT(data.m_Network != nullptr);
+ if (!data.m_Network)
+ {
+ throw armnn::RuntimeException("Network is null");
+ }
+
armnn::IConnectableLayer& reshapeLayer = AddReshapeLayer(*data.m_Network, smallInputHandle, reshapedInfo);
+ reshapeLayer.SetBackendId(setBackend);
if (input0IsSmaller)
{
@@ -473,7 +488,8 @@ void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo,
std::transform(biasScales.begin(), biasScales.end(), biasScales.begin(), UpdateBiasScaleValue);
biasInfo.SetQuantizationScales(biasScales);
- biasInfo.SetQuantizationDim(weightInfo.GetQuantizationDim());
+ // bias is expected to be a 1d tensor, set qdim=0
+ biasInfo.SetQuantizationDim(0);
ALOGV("Bias quantization params have been updated for per-axis quantization");
}
@@ -494,7 +510,7 @@ void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo,
// 4D Tensor Permutations
const armnn::PermutationVector IdentityPermutation4D({ 0U, 1U, 2U, 3U });
const armnn::PermutationVector IdentityPermutation3D({ 0U, 1U, 2U });
-const armnn::PermutationVector SwapDim1And2({ 0U, 2U, 1U, 3U });
+const armnn::PermutationVector SwapDim2And3({ 0U, 1U, 3U, 2U });
// 3D Permutation Vectors
const armnn::PermutationVector RotateTensorLeft({ 1U, 2U, 0U });
@@ -506,9 +522,10 @@ armnn::IConnectableLayer& AddTransposeLayer(armnn::INetwork& network, OSlot& inp
{
// Add swizzle layer
armnn::IConnectableLayer* const layer = network.AddTransposeLayer(mappings);
-
- ARMNN_ASSERT(layer != nullptr);
-
+ if (!layer)
+ {
+ throw armnn::RuntimeException("TransposeLayer is null");
+ }
// Connect input to swizzle layer
input.Connect(layer->GetInputSlot(0));
@@ -570,7 +587,8 @@ bool RequiresReshape(armnn::TensorShape & inputShape)
void SwizzleInputs(armnn::INetwork& network,
std::vector<LayerInputHandle>& inputs,
std::vector<armnn::TensorShape>& inputShapes,
- const armnn::PermutationVector& mapping)
+ const armnn::PermutationVector& mapping,
+ std::vector<armnn::BackendId>& setBackends)
{
if (!mapping.IsEqual(IdentityPermutation4D))
{
@@ -579,6 +597,7 @@ void SwizzleInputs(armnn::INetwork& network,
{
// add swizzle layer
armnn::IConnectableLayer& swizzleLayer = AddTransposeLayer(network, inputs[i], mapping);
+ swizzleLayer.SetBackendId(setBackends[i]);
auto& outputSlot = swizzleLayer.GetOutputSlot(0);
auto& outputInfo = outputSlot.GetTensorInfo();
// replace inputs with the swizzled ones
@@ -596,6 +615,7 @@ bool TransposeInputTensors(ConversionData& data,
// If we have a IdentityPermutation4D or IdentityPermutation3D then we are not permuting
if (!mapping.IsEqual(IdentityPermutation4D) && !mapping.IsEqual(IdentityPermutation3D))
{
+ std::vector<armnn::BackendId> setBackendsVec;
armnn::TensorInfo outputTransposeInfo;
size_t nInputs = inputs.size();
for (size_t i=0; i<nInputs; ++i)
@@ -606,20 +626,23 @@ bool TransposeInputTensors(ConversionData& data,
outputTransposeInfo = armnnUtils::TransposeTensorShape(inputs[i].GetTensorInfo(), mapping);
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsTransposeSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputs[i].GetTensorInfo(),
outputTransposeInfo,
transposeDesc);
+ setBackendsVec.push_back(setBackend);
if (!isSupported)
{
return false;
}
}
- SwizzleInputs(*data.m_Network, inputs, inputShapes, mapping);
+ SwizzleInputs(*data.m_Network, inputs, inputShapes, mapping, setBackendsVec);
}
return true;
}
@@ -630,15 +653,19 @@ bool CreateConcatPermutationParameters(const unsigned int numberOfDimensions,
std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutationPair)
{
bool needPermute = false;
- ARMNN_ASSERT(numberOfDimensions >= 3);
+
+ if (numberOfDimensions < 3)
+ {
+ return Fail("%s: Invalid numberOfDimensions: %i < 3", __func__, numberOfDimensions);
+ }
// ArmNN uses Compute Library subtensors to perform concatenation
// This only works when concatenating along dimension 0, 1 or 3 for a 4-D tensor,
// or along dimension 0 or 2 for a 3-D tensor.
if (numberOfDimensions == 4 && concatDimension == 2)
{
- concatDimension = 1;
- permutationPair = std::make_pair(SwapDim1And2, SwapDim1And2);
+ concatDimension = 3;
+ permutationPair = std::make_pair(SwapDim2And3, SwapDim2And3);
needPermute = true;
}
else if (numberOfDimensions == 3 && concatDimension == 1)
@@ -696,13 +723,18 @@ const HalOperand* GetInputOperand(const HalOperation& operation,
{
if (failOnIndexOutOfBounds)
{
- Fail("%s: invalid input index: %i out of %i", __func__, inputIndex, operation.inputs.size());
+ Fail("%s: Invalid input index: %i out of %i", __func__, inputIndex, operation.inputs.size());
}
return nullptr;
}
// Model should have been validated beforehand
- ARMNN_ASSERT(operation.inputs[inputIndex] < getMainModel(model).operands.size());
+ if (operation.inputs[inputIndex] >= getMainModel(model).operands.size())
+ {
+ Fail("%s: invalid model index: %i >= %i", __func__, inputIndex, getMainModel(model).operands.size());
+ return nullptr;
+ }
+
return &getMainModel(model).operands[operation.inputs[inputIndex]];
}
@@ -721,8 +753,11 @@ const HalOperand* GetOutputOperand(const HalOperation& operation,
}
// Model should have been validated beforehand
- ARMNN_ASSERT(operation.outputs[outputIndex] < getMainModel(model).operands.size());
-
+ if (operation.inputs[outputIndex] >= getMainModel(model).operands.size())
+ {
+ Fail("%s: invalid model index: %i >= %i", __func__, outputIndex, getMainModel(model).operands.size());
+ return nullptr;
+ }
return &getMainModel(model).operands[operation.outputs[outputIndex]];
}
@@ -843,11 +878,9 @@ ConstTensorPin ConvertOperandToConstTensorPin(const HalOperand& operand,
}
armnn::TensorInfo tensorInfo = GetTensorInfoForOperand(operand);
- // Android datalayout might be different than armnn datalayout, e.g. the kernel for the depthwise convolution.
- if (tensorInfo.HasPerAxisQuantization())
- {
- tensorInfo.SetQuantizationDim(dimensionMappings[tensorInfo.GetQuantizationDim().value()]);
- }
+
+ // Make sure isConstant flag is set.
+ tensorInfo.SetConstant();
if (overrideTensorShape != nullptr)
{
@@ -1166,7 +1199,8 @@ template<typename HalPolicy,
LayerInputHandle ConvertToLayerInputHandle(const HalOperation& operation,
uint32_t inputIndex,
const HalModel& model,
- ConversionData& data)
+ ConversionData& data,
+ const armnn::PermutationVector& dimensionMappings = g_DontPermute)
{
using HalOperand = typename HalPolicy::Operand;
using HalOperandType = typename HalPolicy::OperandType;
@@ -1205,6 +1239,7 @@ LayerInputHandle ConvertToLayerInputHandle(const HalOperation& operation,
IsInputSupported,
data.m_Backends,
isInputSupported,
+ armnn::BackendId(),
operandTensorInfo);
if (!isInputSupported)
@@ -1229,14 +1264,18 @@ LayerInputHandle ConvertToLayerInputHandle(const HalOperation& operation,
case HalOperandLifeTime::CONSTANT_REFERENCE:
{
// The tensor has an already known constant value, and can be converted into an ArmNN Constant layer.
- ConstTensorPin tensorPin = ConvertOperandToConstTensorPin<HalPolicy>(*operand, model, data);
+ ConstTensorPin tensorPin =
+ ConvertOperandToConstTensorPin<HalPolicy>(*operand, model, data, dimensionMappings);
+
if (tensorPin.IsValid())
{
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsConstantSupported,
data.m_Backends,
isSupported,
+ setBackend,
tensorPin.GetConstTensor().GetInfo());
if (!isSupported)
{
@@ -1245,17 +1284,18 @@ LayerInputHandle ConvertToLayerInputHandle(const HalOperation& operation,
armnn::IConnectableLayer* constantLayer =
data.m_Network->AddConstantLayer(tensorPin.GetConstTensor());
+ constantLayer->SetBackendId(setBackend);
armnn::IOutputSlot& outputSlot = constantLayer->GetOutputSlot(0);
- outputSlot.SetTensorInfo(tensorPin.GetConstTensor().GetInfo());
+ armnn::TensorInfo constantTensorInfo = tensorPin.GetConstTensor().GetInfo();
+ outputSlot.SetTensorInfo(constantTensorInfo);
- return LayerInputHandle(true, &outputSlot, operandTensorInfo);
+ return LayerInputHandle(true, &outputSlot, constantTensorInfo);
}
else
{
Fail("%s: invalid operand tensor", __func__);
return LayerInputHandle();
}
- break;
}
default:
{
@@ -1279,7 +1319,8 @@ template<typename HalPolicy>
LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetworks::V1_3::Operation& operation,
uint32_t inputIndex,
const::android::hardware::neuralnetworks::V1_3::Model& model,
- ConversionData& data)
+ ConversionData& data,
+ const armnn::PermutationVector& dimensionMappings = g_DontPermute)
{
using HalOperand = typename HalPolicy::Operand;
using HalOperandType = typename HalPolicy::OperandType;
@@ -1332,6 +1373,7 @@ LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetw
IsInputSupported,
data.m_Backends,
isInputSupported,
+ armnn::BackendId(),
operandTensorInfo);
if (!isInputSupported)
@@ -1356,14 +1398,18 @@ LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetw
case HalOperandLifeTime::CONSTANT_REFERENCE:
{
// The tensor has an already known constant value, and can be converted into an ArmNN Constant layer.
- ConstTensorPin tensorPin = ConvertOperandToConstTensorPin<HalPolicy>(*operand, model, data);
+ ConstTensorPin tensorPin =
+ ConvertOperandToConstTensorPin<HalPolicy>(*operand, model, data, dimensionMappings);
+
if (tensorPin.IsValid())
{
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsConstantSupported,
data.m_Backends,
isSupported,
+ setBackend,
tensorPin.GetConstTensor().GetInfo());
if (!isSupported)
{
@@ -1372,10 +1418,12 @@ LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetw
armnn::IConnectableLayer* constantLayer =
data.m_Network->AddConstantLayer(tensorPin.GetConstTensor());
+ constantLayer->SetBackendId(setBackend);
armnn::IOutputSlot& outputSlot = constantLayer->GetOutputSlot(0);
- outputSlot.SetTensorInfo(tensorPin.GetConstTensor().GetInfo());
+ armnn::TensorInfo constantTensorInfo = tensorPin.GetConstTensor().GetInfo();
+ outputSlot.SetTensorInfo(constantTensorInfo);
- return LayerInputHandle(true, &outputSlot, operandTensorInfo);
+ return LayerInputHandle(true, &outputSlot, constantTensorInfo);
}
else
{
@@ -1439,7 +1487,7 @@ bool SetupAndTrackLayerOutputSlot(const HalOperation& operation,
// Type one dynamic tensors require the previous layer's output shape for inference
for (unsigned int inputSlotIndex = 0; inputSlotIndex < layer.GetNumInputSlots(); ++inputSlotIndex)
{
- if(!layer.GetInputSlot(inputSlotIndex).GetConnection())
+ if (!layer.GetInputSlot(inputSlotIndex).GetConnection())
{
return false;
}
@@ -1569,13 +1617,14 @@ bool ConvertToActivation(const HalOperation& operation,
const armnn::TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand);
bool isSupported = false;
-
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsActivationSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
outInfo,
activationDesc);
@@ -1596,7 +1645,11 @@ bool ConvertToActivation(const HalOperation& operation,
}
armnn::IConnectableLayer* layer = data.m_Network->AddActivationLayer(activationDesc);
- ARMNN_ASSERT(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ActivationLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -1782,13 +1835,14 @@ bool ConvertPooling2d(const HalOperation& operation,
}
bool isSupported = false;
-
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsPooling2dSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
desc);
@@ -1810,6 +1864,7 @@ bool ConvertPooling2d(const HalOperation& operation,
}
armnn::IConnectableLayer* pooling2dLayer = data.m_Network->AddPooling2dLayer(desc);
+ pooling2dLayer->SetBackendId(setBackend);
if (!pooling2dLayer)
{
return Fail("%s: AddPooling2dLayer failed", __func__);
@@ -1829,79 +1884,6 @@ bool ConvertPooling2d(const HalOperation& operation,
template<typename HalPolicy,
typename HalOperation = typename HalPolicy::Operation,
typename HalModel = typename HalPolicy::Model>
-bool ConvertAdd(const HalOperation& operation, const HalModel& model, ConversionData& data)
-{
- using HalOperand = typename HalPolicy::Operand;
-
- LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
- LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
-
- if (!input0.IsValid() || !input1.IsValid())
- {
- return Fail("%s: Operation has invalid inputs", __func__);
- }
-
- // The FuseActivation parameter is always the input index 2
- // and it should be optional
- ActivationFn activationFunction;
- if (!GetOptionalInputActivation<HalPolicy>(operation, 2, activationFunction, model, data))
- {
- return Fail("%s: Operation has invalid inputs", __func__);
- }
-
- const HalOperand* outputOperand = GetOutputOperand<HalPolicy>(operation, 0, model);
- if (!outputOperand)
- {
- return false;
- }
-
- const armnn::TensorInfo& inputInfo0 = input0.GetTensorInfo();
- const armnn::TensorInfo& inputInfo1 = input1.GetTensorInfo();
-
- const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
-
- bool isSupported = false;
- auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
- {
- FORWARD_LAYER_SUPPORT_FUNC(__func__,
- IsAdditionSupported,
- data.m_Backends,
- isSupported,
- inputInfo0,
- inputInfo1,
- outputInfo);
- };
-
- if(!IsDynamicTensor(outputInfo))
- {
- validateFunc(outputInfo, isSupported);
- }
- else
- {
- isSupported = AreDynamicTensorsSupported();
- }
-
- if (!isSupported)
- {
- return false;
- }
-
- armnn::IConnectableLayer* const startLayer = data.m_Network->AddAdditionLayer();
-
- bool isReshapeSupported = BroadcastTensor(input0, input1, startLayer, data);
- if (!isReshapeSupported)
- {
- return false;
- }
-
- return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
- data, nullptr, validateFunc, activationFunction);
-
-}
-
-template<typename HalPolicy,
- typename HalOperation = typename HalPolicy::Operation,
- typename HalModel = typename HalPolicy::Model>
bool ConvertArgMinMax(const HalOperation& operation,
const HalModel& model,
ConversionData& data,
@@ -1951,13 +1933,14 @@ bool ConvertArgMinMax(const HalOperation& operation,
descriptor.m_Axis = axis;
bool isSupported = false;
-
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsArgMinMaxSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo0,
outputInfo,
descriptor);
@@ -1978,8 +1961,11 @@ bool ConvertArgMinMax(const HalOperation& operation,
}
armnn::IConnectableLayer* layer = data.m_Network->AddArgMinMaxLayer(descriptor);
- assert(layer != nullptr);
-
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ArgMinMaxLayer", __func__);
+ }
input0.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2082,10 +2068,12 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
reshapeDescriptor.m_TargetShape = reshapeInfo.GetShape();
bool isSupported = false;
+ armnn::BackendId setBackendReshape;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsReshapeSupported,
data.m_Backends,
isSupported,
+ setBackendReshape,
operandInputHandle.GetTensorInfo(),
reshapeInfo,
reshapeDescriptor);
@@ -2095,6 +2083,7 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
return false;
}
armnn::IConnectableLayer& newReshape = AddReshapeLayer(*data.m_Network, operandInputHandle, reshapeInfo);
+ newReshape.SetBackendId(setBackendReshape);
// Point to the reshape operation rather then the input operation
operandShape = reshapeInfo.GetShape();
@@ -2110,7 +2099,11 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
}
}
- ARMNN_ASSERT(inputShapes.size() == inputHandles.size());
+ if (inputShapes.size() != inputHandles.size())
+ {
+ return Fail("%s: invalid model input shapes size doesn't match input handles size: %i != %i", __func__,
+ inputShapes.size(), inputHandles.size());
+ }
if (inputsHaveBeenReshaped)
{
@@ -2197,9 +2190,16 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
[](const LayerInputHandle& h)->const armnn::TensorInfo*{ return &h.GetTensorInfo(); });
bool isSupported = false;
+ armnn::BackendId setBackendConcat;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported){
- FORWARD_LAYER_SUPPORT_FUNC(__func__, IsConcatSupported, data.m_Backends, isSupported, inputTensorInfos,
- outputInfo, concatDescriptor);
+ FORWARD_LAYER_SUPPORT_FUNC(__func__,
+ IsConcatSupported,
+ data.m_Backends,
+ isSupported,
+ setBackendConcat,
+ inputTensorInfos,
+ outputInfo,
+ concatDescriptor);
};
if (!isDynamicTensor)
@@ -2217,15 +2217,24 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
}
armnn::IConnectableLayer* layer = data.m_Network->AddConcatLayer(concatDescriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackendConcat);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ConcatLayer", __func__);
+ }
layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
// Connect inputs to the layer
const int numInputSlots = layer->GetNumInputSlots();
- assert(static_cast<std::size_t>(numInputSlots) == inputHandles.size());
+
+ if (static_cast<std::size_t>(numInputSlots) != inputHandles.size())
+ {
+ return Fail("%s: invalid model input slots size doesn't match input handles size: %i != %i", __func__,
+ static_cast<std::size_t>(numInputSlots), inputHandles.size());
+ }
for (int i = 0; i < numInputSlots; ++i)
{
// connect the input directly to the merge (concat) layer
- inputHandles[static_cast<unsigned int>(i)].Connect(layer->GetInputSlot(i));
+ inputHandles[static_cast<unsigned int>(i)].Connect(layer->GetInputSlot(static_cast<unsigned int>(i)));
}
// Transpose the output shape
@@ -2236,10 +2245,12 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
armnn::TensorInfo outputTransposeInfo = armnnUtils::TransposeTensorShape(inputTransposeInfo,
permutationPair.second);
isSupported = false;
+ armnn::BackendId setBackendTranspose;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsTransposeSupported,
data.m_Backends,
isSupported,
+ setBackendTranspose,
inputTransposeInfo,
outputTransposeInfo,
transposeDesc);
@@ -2250,6 +2261,7 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
// Add permutation layer and connect the output to it, the permutation becomes the output layer
armnn::IConnectableLayer& deswizzleLayer = AddTransposeLayer(*data.m_Network, layer->GetOutputSlot(0),
permutationPair.second);
+ deswizzleLayer.SetBackendId(setBackendTranspose);
layer = &deswizzleLayer;
return true;
@@ -2265,7 +2277,10 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
if (isDynamicTensor)
{
// Infer the output shapes of concat if outputs are type 1 dynamic
- ARMNN_ASSERT(layer->GetOutputSlot(0).IsTensorInfoSet());
+ if (!layer->GetOutputSlot(0).IsTensorInfoSet())
+ {
+ return Fail("%s: TensorInfo is not set", __func__);
+ }
if (!ValidateConcatOutputShape(inputShapes,
layer->GetOutputSlot(0).GetTensorInfo().GetShape(),
concatDim))
@@ -2292,11 +2307,13 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
armnn::TensorInfo concatInfo = layer->GetOutputSlot(0).GetTensorInfo();
isSupported = false;
+ armnn::BackendId setBackendReshape2;
auto validateReshapeFunc = [&](const armnn::TensorInfo& afterConcatInfo, bool& isSupported){
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsReshapeSupported,
data.m_Backends,
isSupported,
+ setBackendReshape2,
concatInfo,
afterConcatInfo,
reshapeDescriptor);
@@ -2316,6 +2333,7 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
return false;
}
layer = &AddReshapeLayer(*data.m_Network, layer->GetOutputSlot(0), afterConcatInfo);
+ layer->SetBackendId(setBackendReshape2);
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation,
0,
*layer,
@@ -2351,18 +2369,21 @@ bool ConvertConv2d(const HalOperation& operation, const HalModel& model, Convers
const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
- // ArmNN does not currently support non-fixed weights or bias
- const ConstTensorPin weightsPin = ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 1, model, data);
- const ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 2, model, data);
+ LayerInputHandle weightsInput = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
+ if (!weightsInput.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
- if (!weightsPin.IsValid() || !biasPin.IsValid())
+ LayerInputHandle biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data); // 1D
+ if (!biasInput.IsValid())
{
return Fail("%s: Operation has invalid inputs", __func__);
}
- armnn::ConstTensor weights = weightsPin.GetConstTensor();
- armnn::ConstTensor bias = biasPin.GetConstTensor();
- SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo);
+ biasInput.SanitizeQuantizationScale(weightsInput, input);
+ armnn::TensorInfo weightsInfo = weightsInput.GetTensorInfo();
+ armnn::TensorInfo biasInfo = biasInput.GetTensorInfo();
armnn::Convolution2dDescriptor desc;
desc.m_DataLayout = armnn::DataLayout::NHWC;
@@ -2392,8 +2413,8 @@ bool ConvertConv2d(const HalOperation& operation, const HalModel& model, Convers
return Fail("%s: Operation has invalid inputs", __func__);
}
- const uint32_t kernelX = weights.GetShape()[2];
- const uint32_t kernelY = weights.GetShape()[1];
+ const uint32_t kernelX = weightsInfo.GetShape()[2];
+ const uint32_t kernelY = weightsInfo.GetShape()[1];
const uint32_t inputX = inputInfo.GetShape()[2];
const uint32_t inputY = inputInfo.GetShape()[1];
@@ -2406,19 +2427,21 @@ bool ConvertConv2d(const HalOperation& operation, const HalModel& model, Convers
}
desc.m_BiasEnabled = true;
- armnn::Optional<armnn::TensorInfo> biases(bias.GetInfo());
+ armnn::Optional<armnn::TensorInfo> biases(biasInfo);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsConvolution2dSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
desc,
- weights.GetInfo(),
+ weightsInfo,
biases);
};
@@ -2436,8 +2459,8 @@ bool ConvertConv2d(const HalOperation& operation, const HalModel& model, Convers
return false;
}
- armnn::IConnectableLayer* startLayer =
- data.m_Network->AddConvolution2dLayer(desc, weights, armnn::Optional<armnn::ConstTensor>(bias));
+ armnn::IConnectableLayer* startLayer = data.m_Network->AddConvolution2dLayer(desc);
+ startLayer->SetBackendId(setBackend);
if (!startLayer)
{
@@ -2446,6 +2469,10 @@ bool ConvertConv2d(const HalOperation& operation, const HalModel& model, Convers
input.Connect(startLayer->GetInputSlot(0));
+ // Connect weights and bias inputs
+ weightsInput.Connect(startLayer->GetInputSlot(1));
+ biasInput.Connect(startLayer->GetInputSlot(2));
+
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
data, nullptr, validateFunc, activation);
}
@@ -2484,7 +2511,7 @@ bool ConvertDepthToSpace(const HalOperation& operation, const HalModel& model, C
GetInputScalar<HalPolicy>(operation, 1, HalOperandType::INT32, descriptor.m_BlockSize, model, data);
if (descriptor.m_BlockSize <= 1)
{
- return Fail("%s: Block size must be at least 1 in all dimensions");
+ return Fail("%s: Block size must be at least 1 in all dimensions", __func__);
}
descriptor.m_DataLayout = armnn::DataLayout::NHWC;
@@ -2494,12 +2521,14 @@ bool ConvertDepthToSpace(const HalOperation& operation, const HalModel& model, C
}
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsDepthToSpaceSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -2520,7 +2549,11 @@ bool ConvertDepthToSpace(const HalOperation& operation, const HalModel& model, C
}
armnn::IConnectableLayer* const layer = data.m_Network->AddDepthToSpaceLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the DepthToSpaceLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2554,10 +2587,9 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
// ArmNN does not currently support non-fixed weights or bias
// Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ]
const HalOperand* weightsOperand = GetInputOperand<HalPolicy>(operation, 1, model);
-
- if (weightsOperand == nullptr)
+ if (!weightsOperand)
{
- return Fail("%s: Operand is invalid", __func__);
+ return Fail("%s: Could not read weights", __func__);
}
// Basic sanity check on the weights shape.
// ANEURALNETWORKS_DEPTHWISE_CONV_2D specifies a 4-D tensor, of shape
@@ -2570,34 +2602,27 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
armnn::DepthwiseConvolution2dDescriptor desc;
desc.m_DataLayout = armnn::DataLayout::NHWC;
- // Reinterpret weight data as [ H, W, I, M ]
- armnn::TensorShape weightsShape({ weightsOperand->dimensions[1],
- weightsOperand->dimensions[2],
- inputInfo.GetShape()[3],
- weightsOperand->dimensions[3] / inputInfo.GetShape()[3] });
-
- // Swizzle weight data [ H, W, I, M ] -> [ M, I, H, W ]
- const armnn::PermutationVector HWIMToMIHW = { 2U, 3U, 1U, 0U };
-
- const ConstTensorPin weightsPin =
- ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
- 1,
- model,
- data,
- HWIMToMIHW,
- &weightsShape);
+ LayerInputHandle weightsInput = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
+ if (!weightsInput.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
- // Bias is a 1D tensor
- const ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 2, model, data);
+ const HalOperand* biasOperand = GetInputOperand<HalPolicy>(operation, 2, model);
+ if (!biasOperand)
+ {
+ return Fail("%s: Could not read bias", __func__);
+ }
- if (!weightsPin.IsValid() || !biasPin.IsValid())
+ LayerInputHandle biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data); // 1D
+ if (!biasInput.IsValid())
{
return Fail("%s: Operation has invalid inputs", __func__);
}
- armnn::ConstTensor weights = weightsPin.GetConstTensor();
- armnn::ConstTensor bias = biasPin.GetConstTensor();
- SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo);
+ biasInput.SanitizeQuantizationScale(weightsInput, input);
+ armnn::TensorInfo weightsInfo = weightsInput.GetTensorInfo();
+ armnn::TensorInfo biasInfo = biasInput.GetTensorInfo();
ActivationFn activation;
@@ -2625,8 +2650,8 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
return Fail("%s: Operation has invalid inputs", __func__);
}
- const uint32_t kernelX = weights.GetShape()[3];
- const uint32_t kernelY = weights.GetShape()[2];
+ const uint32_t kernelX = weightsInfo.GetShape()[2];
+ const uint32_t kernelY = weightsInfo.GetShape()[1];
const uint32_t inputX = inputInfo.GetShape()[2];
const uint32_t inputY = inputInfo.GetShape()[1];
@@ -2639,19 +2664,21 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
}
desc.m_BiasEnabled = true;
- armnn::Optional<armnn::TensorInfo> biases(bias.GetInfo());
+ armnn::Optional<armnn::TensorInfo> biases(biasInfo);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsDepthwiseConvolutionSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
desc,
- weights.GetInfo(),
+ weightsInfo,
biases);
};
@@ -2670,8 +2697,8 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
return false;
}
- armnn::IConnectableLayer* startLayer =
- data.m_Network->AddDepthwiseConvolution2dLayer(desc, weights, armnn::Optional<armnn::ConstTensor>(bias));
+ armnn::IConnectableLayer* startLayer = data.m_Network->AddDepthwiseConvolution2dLayer(desc);
+ startLayer->SetBackendId(setBackend);
if (!startLayer)
{
return Fail("%s: AddDepthwiseConvolution2dLayer failed", __func__);
@@ -2679,6 +2706,10 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
input.Connect(startLayer->GetInputSlot(0));
+ // Connect weights and bias inputs
+ weightsInput.Connect(startLayer->GetInputSlot(1));
+ biasInput.Connect(startLayer->GetInputSlot(2));
+
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
data, nullptr, validateFunc, activation);
}
@@ -2712,12 +2743,14 @@ bool ConvertDequantize(const HalOperation& operation, const HalModel& model, Con
const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsDequantizeSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo);
};
@@ -2737,7 +2770,11 @@ bool ConvertDequantize(const HalOperation& operation, const HalModel& model, Con
}
armnn::IConnectableLayer* const layer = data.m_Network->AddDequantizeLayer();
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the DequantizeLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2746,10 +2783,16 @@ bool ConvertDequantize(const HalOperation& operation, const HalModel& model, Con
template<typename HalPolicy,
typename HalOperation = typename HalPolicy::Operation,
typename HalModel = typename HalPolicy::Model>
-bool ConvertDiv(const HalOperation& operation, const HalModel& model, ConversionData& data)
+bool ConvertElementwiseBinary(const HalOperation& operation,
+ const HalModel& model,
+ ConversionData& data,
+ armnn::BinaryOperation binaryOperation)
{
using HalOperand = typename HalPolicy::Operand;
+ ALOGV("HalPolicy::ConvertElementwiseBinary()");
+ ALOGV("binaryOperation = %s", GetBinaryOperationAsCString(binaryOperation));
+
LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
@@ -2758,35 +2801,38 @@ bool ConvertDiv(const HalOperation& operation, const HalModel& model, Conversion
return Fail("%s: Operation has invalid inputs", __func__);
}
- // The FuseActivation parameter is always the input index 2
- // and it should be optional
+ // The FuseActivation parameter is always the input index 2, and it should be optional
ActivationFn activationFunction;
if (!GetOptionalInputActivation<HalPolicy>(operation, 2, activationFunction, model, data))
{
- return Fail("%s: Operation has invalid inputs", __func__);
+ return Fail("%s: Operation has invalid optional input: activation function", __func__);
}
const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
if (!output)
{
- return Fail("%s: Could not read output 0", __func__);
+ return Fail("%s: Could not read output", __func__);
}
const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+ armnn::ElementwiseBinaryDescriptor descriptor(binaryOperation);
+
bool isSupported = false;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
- IsDivisionSupported,
+ IsElementwiseBinarySupported,
data.m_Backends,
isSupported,
+ armnn::BackendId(),
input0.GetTensorInfo(),
input1.GetTensorInfo(),
- outputInfo);
+ outputInfo,
+ binaryOperation);
};
- if(!IsDynamicTensor(outputInfo))
+ if (!IsDynamicTensor(outputInfo))
{
validateFunc(outputInfo, isSupported);
}
@@ -2800,19 +2846,22 @@ bool ConvertDiv(const HalOperation& operation, const HalModel& model, Conversion
return false;
}
- armnn::IConnectableLayer* const startLayer = data.m_Network->AddDivisionLayer();
-
- bool isReshapeSupported = BroadcastTensor(input0, input1, startLayer, data);
+ armnn::IConnectableLayer* layer = data.m_Network->AddElementwiseBinaryLayer(descriptor);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ElementwiseBinaryLayer", __func__);
+ }
+ bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data);
if (!isReshapeSupported)
{
return false;
}
- return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
- data, nullptr, validateFunc, activationFunction);
-
+ return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc,
+ activationFunction);
}
+
template<typename HalPolicy,
typename HalOperation = typename HalPolicy::Operation,
typename HalModel = typename HalPolicy::Model>
@@ -2835,12 +2884,14 @@ bool ConvertFloor(const HalOperation& operation, const HalModel& model, Conversi
const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsFloorSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
outputInfo);
};
@@ -2860,7 +2911,11 @@ bool ConvertFloor(const HalOperation& operation, const HalModel& model, Conversi
}
armnn::IConnectableLayer* layer = data.m_Network->AddFloorLayer();
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the FloorLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2945,7 +3000,11 @@ DequantizeResult DequantizeIfRequired(size_t operand_index,
}
const HalOperand* operand = GetInputOperand<HalPolicy>(operationIt, 0, model);
- ARMNN_ASSERT(operand);
+
+ if (!operand)
+ {
+ return { nullptr, 0, armnn::TensorInfo(), DequantizeStatus::INVALID_OPERAND };
+ }
if (!IsQSymm8(*operand))
{
@@ -2969,8 +3028,12 @@ DequantizeResult DequantizeIfRequired(size_t operand_index,
for (size_t i = 0; i < dequantizedBufferLength; ++i)
{
float* dstPtr = dequantizedBuffer.get();
- ARMNN_ASSERT(dstPtr);
- *dstPtr++ = quantizedBuffer[i] * quantizationScale;
+
+ if (!dstPtr)
+ {
+ return { nullptr, 0, armnn::TensorInfo(), DequantizeStatus::INVALID_OPERAND };
+ }
+ *dstPtr = quantizedBuffer[i] * quantizationScale;
}
// Construct tensor info for dequantized ConstTensor
@@ -3049,26 +3112,12 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
return Fail("%s: Could not read weights", __func__);
}
- const armnn::TensorInfo& weightsInfo = GetTensorInfoForOperand(*weightsOperand);
- bool constantWeights = IsOperandConstant<HalPolicy>(*weightsOperand);
-
- armnn::Optional<armnn::ConstTensor> optionalWeights = armnn::EmptyOptional();
- if (!constantWeights)
- {
- weightsInput = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
- if (!weightsInput.IsValid())
- {
- return Fail("%s: Operation has invalid inputs", __func__);
- }
- }
- else
+ // If weights are constant a separate constant layer will be created to store data.
+ // Otherwise handle non const weights as inputs.
+ weightsInput = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
+ if (!weightsInput.IsValid())
{
- ConstTensorPin weightsPin = DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 1);
- if (!weightsPin.IsValid())
- {
- return Fail("%s: Operation has invalid weights", __func__);
- }
- optionalWeights = armnn::Optional<armnn::ConstTensor>(weightsPin.GetConstTensor());
+ return Fail("%s: Operation has invalid inputs", __func__);
}
LayerInputHandle biasInput = LayerInputHandle();
@@ -3077,33 +3126,16 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
{
return Fail("%s: Could not read bias", __func__);
}
- armnn::TensorInfo biasInfo = GetTensorInfoForOperand(*biasOperand);
- bool constantBias = IsOperandConstant<HalPolicy>(*biasOperand);
-
- armnn::Optional<armnn::ConstTensor> optionalBias = armnn::EmptyOptional();
- if (!constantBias)
- {
- biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data);
- if (!biasInput.IsValid())
- {
- return Fail("%s: Operation has invalid inputs", __func__);
- }
- }
- else
- {
- ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 2, model, data); // 1D
- if (!biasPin.IsValid())
- {
- return Fail("%s: Operation has invalid bias", __func__);
- }
- optionalBias = armnn::Optional<armnn::ConstTensor>(biasPin.GetConstTensor());
- }
- if ((constantWeights && !constantBias) || (!constantWeights && constantBias))
+ // If bias are constant a separate constant layer will be created to store data.
+ // Otherwise handle non const bias as inputs.
+ biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data); // 1D
+ if (!biasInput.IsValid())
{
- return Fail("%s: Non-compatible weights and bias", __func__);
+ return Fail("%s: Operation has invalid inputs", __func__);
}
+ armnn::TensorInfo weightsInfo = weightsInput.GetTensorInfo();
armnn::TensorInfo reshapedInfo = inputInfo;
try
{
@@ -3114,7 +3146,8 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
return Fail("%s: %s", __func__, e.what());
}
- // ensuring that the bias value is within 1% of the weights input (small float differences can exist)
+ // Ensuring that the bias value is within 1% of the weights input (small float differences can exist)
+ armnn::TensorInfo biasInfo = biasInput.GetTensorInfo();
SanitizeBiasQuantizationScale(biasInfo, weightsInfo, reshapedInfo);
ActivationFn activationFunction;
@@ -3126,9 +3159,10 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
armnn::FullyConnectedDescriptor desc;
desc.m_TransposeWeightMatrix = true;
desc.m_BiasEnabled = true;
- desc.m_ConstantWeights = constantWeights;
+ desc.m_ConstantWeights = IsOperandConstant<HalPolicy>(*weightsOperand);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
if (!VerifyFullyConnectedShapes(reshapedInfo.GetShape(),
@@ -3145,6 +3179,7 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
IsFullyConnectedSupported,
data.m_Backends,
isSupported,
+ setBackend,
reshapedInfo,
outputInfo,
weightsInfo,
@@ -3166,10 +3201,9 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
return false;
}
- armnn::IConnectableLayer* startLayer =
- data.m_Network->AddFullyConnectedLayer(desc,
- optionalWeights,
- optionalBias);
+ // Add FullyConnected layer. Weights and bias will be connected as constant layers or non const inputs.
+ armnn::IConnectableLayer* startLayer = data.m_Network->AddFullyConnectedLayer(desc);
+ startLayer->SetBackendId(setBackend);
if (inputInfo.GetNumDimensions() > 2U)
{
@@ -3177,7 +3211,10 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
reshapeDescriptor.m_TargetShape = reshapedInfo.GetShape();
armnn::IConnectableLayer* reshapeLayer = data.m_Network->AddReshapeLayer(reshapeDescriptor);
- assert(reshapeLayer != nullptr);
+ if (!reshapeLayer)
+ {
+ return Fail("%s: could not add the reshapeLayer", __func__);
+ }
input.Connect(reshapeLayer->GetInputSlot(0));
reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(0));
@@ -3187,12 +3224,9 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
input.Connect(startLayer->GetInputSlot(0));
}
- // connect weights input
- if (!desc.m_ConstantWeights)
- {
- weightsInput.Connect(startLayer->GetInputSlot(1));
- biasInput.Connect(startLayer->GetInputSlot(2));
- }
+ // Connect weights and bias inputs
+ weightsInput.Connect(startLayer->GetInputSlot(1));
+ biasInput.Connect(startLayer->GetInputSlot(2));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
data, nullptr, validateFunc, activationFunction);
@@ -3234,12 +3268,14 @@ bool ConvertL2Normalization(const HalOperation& operation, const HalModel& model
desc.m_DataLayout = armnn::DataLayout::NHWC;
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsL2NormalizationSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
desc);
@@ -3260,7 +3296,11 @@ bool ConvertL2Normalization(const HalOperation& operation, const HalModel& model
}
armnn::IConnectableLayer* layer = data.m_Network->AddL2NormalizationLayer(desc);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the L2NormalizationLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -3320,12 +3360,14 @@ bool ConvertLocalResponseNormalization(const HalOperation& operation,
descriptor.m_NormSize = 1 + (2 * descriptor.m_NormSize);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsNormalizationSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -3345,9 +3387,12 @@ bool ConvertLocalResponseNormalization(const HalOperation& operation,
return false;
}
-
armnn::IConnectableLayer* layer = data.m_Network->AddNormalizationLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the NormalizationLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -3418,12 +3463,14 @@ bool ConvertMean(const HalOperation& operation, const HalModel& model, Conversio
descriptor.m_KeepDims = keepDims > 0;
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsMeanSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -3444,80 +3491,14 @@ bool ConvertMean(const HalOperation& operation, const HalModel& model, Conversio
}
armnn::IConnectableLayer* const layer = data.m_Network->AddMeanLayer(descriptor);
- assert(layer != nullptr);
- input.Connect(layer->GetInputSlot(0));
-
- return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
-}
-
-template<typename HalPolicy,
- typename HalOperation = typename HalPolicy::Operation,
- typename HalModel = typename HalPolicy::Model>
-bool ConvertMul(const HalOperation& operation, const HalModel& model, ConversionData& data)
-{
- using HalOperand = typename HalPolicy::Operand;
-
- LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
- LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
-
- if (!input0.IsValid() || !input1.IsValid())
- {
- return Fail("%s: Operation has invalid inputs", __func__);
- }
-
- // The FuseActivation parameter is always the input index 2
- // and it should be optional
- ActivationFn activationFunction;
- if (!GetOptionalInputActivation<HalPolicy>(operation, 2, activationFunction, model, data))
- {
- return Fail("%s: Operation has invalid inputs", __func__);
- }
-
- const HalOperand* outputOperand = GetOutputOperand<HalPolicy>(operation, 0, model);
-
- if (outputOperand == nullptr)
+ layer->SetBackendId(setBackend);
+ if (!layer)
{
- return false;
- }
-
- const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
-
- bool isSupported = false;
- auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
- {
- FORWARD_LAYER_SUPPORT_FUNC(__func__,
- IsMultiplicationSupported,
- data.m_Backends,
- isSupported,
- input0.GetTensorInfo(),
- input1.GetTensorInfo(),
- outputInfo);
- };
-
- if(!IsDynamicTensor(outputInfo))
- {
- validateFunc(outputInfo, isSupported);
- }
- else
- {
- isSupported = AreDynamicTensorsSupported();
- }
-
- if (!isSupported)
- {
- return false;
- }
-
- armnn::IConnectableLayer* const startLayer = data.m_Network->AddMultiplicationLayer();
-
- bool isReshapeSupported = BroadcastTensor(input0, input1, startLayer, data);
- if (!isReshapeSupported)
- {
- return false;
+ return Fail("%s: Could not add the MeanLayer", __func__);
}
+ input.Connect(layer->GetInputSlot(0));
- return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
- data, nullptr, validateFunc, activationFunction);
+ return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
}
template<typename HalPolicy,
@@ -3561,12 +3542,14 @@ bool ConvertPad(HalOperation& operation, const HalModel& model, ConversionData&
const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsPadSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -3587,7 +3570,11 @@ bool ConvertPad(HalOperation& operation, const HalModel& model, ConversionData&
}
armnn::IConnectableLayer* const layer = data.m_Network->AddPadLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the PadLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -3646,12 +3633,14 @@ bool ConvertReshape(const HalOperation& operation, const HalModel& model, Conver
const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsReshapeSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
outputInfo,
reshapeDescriptor);
@@ -3672,78 +3661,14 @@ bool ConvertReshape(const HalOperation& operation, const HalModel& model, Conver
}
armnn::IConnectableLayer* layer = data.m_Network->AddReshapeLayer(reshapeDescriptor);
- assert(layer != nullptr);
- input.Connect(layer->GetInputSlot(0));
-
- return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
-}
-
-template<typename HalPolicy,
- typename HalOperation = typename HalPolicy::Operation,
- typename HalModel = typename HalPolicy::Model>
-bool ConvertSub(const HalOperation& operation, const HalModel& model, ConversionData& data)
-{
- using HalOperand = typename HalPolicy::Operand;
-
- LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
- LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
-
- if (!input0.IsValid() || !input1.IsValid())
+ layer->SetBackendId(setBackend);
+ if (!layer)
{
- return Fail("%s: Operation has invalid inputs", __func__);
+ return Fail("%s: Could not add the ReshapeLayer", __func__);
}
+ input.Connect(layer->GetInputSlot(0));
- // The FuseActivation parameter is always the input index 2
- // and it should be optional
- ActivationFn activationFunction;
- if (!GetOptionalInputActivation<HalPolicy>(operation, 2, activationFunction, model, data))
- {
- return Fail("%s: Operation has invalid inputs", __func__);
- }
-
- const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
- if (!output)
- {
- return Fail("%s: Could not read output 0", __func__);
- }
-
- const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
-
- bool isSupported = false;
- auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
- {
- FORWARD_LAYER_SUPPORT_FUNC(__func__,
- IsSubtractionSupported,
- data.m_Backends,
- isSupported,
- input0.GetTensorInfo(),
- input1.GetTensorInfo(),
- outputInfo);
- };
-
- if(IsDynamicTensor(outputInfo))
- {
- isSupported = AreDynamicTensorsSupported();
- }
- else
- {
- validateFunc(outputInfo, isSupported);
- }
-
- if (!isSupported)
- {
- return false;
- }
-
- armnn::IConnectableLayer* const startLayer = data.m_Network->AddSubtractionLayer();
-
- bool isReshapeSupported = BroadcastTensor(input0, input1, startLayer, data);
- if (!isReshapeSupported)
- {
- return false;
- }
- return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
- data, nullptr, validateFunc, activationFunction);
+ return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
}
template<typename HalPolicy,
@@ -3781,13 +3706,13 @@ bool ConvertSqueeze(const HalOperation& operation, const HalModel& model, Conver
// if the operand index is out of bounds.
const HalOperand* axisOperand = GetInputOperand<HalPolicy>(operation, 1, model, false);
- const uint32_t dimensionSequence[] = { 0, 1, 2, 3 };
-
std::vector<int32_t> axis;
if (!axisOperand)
{
- axis.assign(dimensionSequence,
- dimensionSequence + rank);
+ for (unsigned int i = 0; i < rank; ++i)
+ {
+ axis.push_back(static_cast<unsigned int>(i));
+ }
}
else if (!GetTensorInt32Values<HalPolicy>(*axisOperand, axis, model, data))
{
@@ -3814,10 +3739,12 @@ bool ConvertSqueeze(const HalOperation& operation, const HalModel& model, Conver
reshapeDesc.m_TargetShape = outputInfo.GetShape();
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsReshapeSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
reshapeDesc);
@@ -3828,7 +3755,11 @@ bool ConvertSqueeze(const HalOperation& operation, const HalModel& model, Conver
}
armnn::IConnectableLayer* const layer = data.m_Network->AddReshapeLayer(reshapeDesc);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ReshapeLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data);
@@ -3914,12 +3845,14 @@ bool ConvertStridedSlice(const HalOperation& operation, const HalModel& model, C
}
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsStridedSliceSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -3963,7 +3896,11 @@ bool ConvertStridedSlice(const HalOperation& operation, const HalModel& model, C
}
armnn::IConnectableLayer* const layer = data.m_Network->AddStridedSliceLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the StridedSliceLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -4021,12 +3958,14 @@ bool ConvertTranspose(const HalOperation& operation, const HalModel& model, Conv
const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsTransposeSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
transposeDesc);
@@ -4047,7 +3986,11 @@ bool ConvertTranspose(const HalOperation& operation, const HalModel& model, Conv
}
armnn::IConnectableLayer* const layer = data.m_Network->AddTransposeLayer(transposeDesc);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the TransposeLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -4115,12 +4058,14 @@ bool ConvertBatchToSpaceNd(const HalOperation& operation,
batchToSpaceNdDesc.m_Crops = {{0, 0}, {0, 0}};
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsBatchToSpaceNdSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
batchToSpaceNdDesc);
@@ -4142,7 +4087,11 @@ bool ConvertBatchToSpaceNd(const HalOperation& operation,
}
armnn::IConnectableLayer* const layer = data.m_Network->AddBatchToSpaceNdLayer(batchToSpaceNdDesc);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the BatchToSpaceNdLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -4217,7 +4166,8 @@ bool ConvertSpaceToBatchNd(const HalOperation& operation, const HalModel& model,
return Fail("%s: Operation has invalid paddings operand, invalid padding values.", __func__);
}
- paddingList.emplace_back((unsigned int) paddingBeforeInput, (unsigned int) paddingAfterInput);
+ paddingList.emplace_back(static_cast<unsigned int>(paddingBeforeInput),
+ static_cast<unsigned int>(paddingAfterInput));
}
armnn::SpaceToBatchNdDescriptor descriptor;
@@ -4231,12 +4181,14 @@ bool ConvertSpaceToBatchNd(const HalOperation& operation, const HalModel& model,
}
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsSpaceToBatchNdSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -4257,7 +4209,11 @@ bool ConvertSpaceToBatchNd(const HalOperation& operation, const HalModel& model,
}
armnn::IConnectableLayer* const layer = data.m_Network->AddSpaceToBatchNdLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the BatchToSpaceLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
diff --git a/ConversionUtils_1_2.hpp b/ConversionUtils_1_2.hpp
index cddb13d7..00c891ae 100644
--- a/ConversionUtils_1_2.hpp
+++ b/ConversionUtils_1_2.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -23,6 +23,39 @@ using namespace armnn;
using namespace android::nn;
template<typename HalPolicy,
+ typename HalOperation = typename HalPolicy::Operation,
+ typename HalModel = typename HalPolicy::Model>
+bool IsWeightsValid(const HalOperation& operation,
+ uint32_t inputIndex,
+ const HalModel& model,
+ const bool isOptional = true)
+{
+ using HalOperand = typename HalPolicy::Operand;
+ using HalOperandLifeTime = typename HalPolicy::OperandLifeTime;
+ const HalOperand* operand = GetInputOperand<HalPolicy>(operation, inputIndex, model);
+ if (!operand)
+ {
+ Fail("%s: failed to get input operand %i", __func__, inputIndex);
+ return false;
+ }
+
+ // If the operand is not an optional operand it cannot have a NO_VALUE lifetime
+ if (!isOptional && operand->lifetime == HalOperandLifeTime::NO_VALUE)
+ {
+ return false;
+ }
+
+ if (operand->lifetime != HalOperandLifeTime::CONSTANT_COPY
+ && operand->lifetime != HalOperandLifeTime::CONSTANT_REFERENCE
+ && operand->lifetime != HalOperandLifeTime::NO_VALUE)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+template<typename HalPolicy,
typename HalOperation = typename HalPolicy::Operation,
typename HalModel = typename HalPolicy::Model>
bool IsQSymmDequantizeForWeights(const HalOperation& operation, const HalModel& model)
@@ -110,6 +143,157 @@ bool SetupAndTrackLayerOutputSlotAndOverrideTensorInfo(const HalOperation& opera
}
template<typename HalPolicy,
+ typename HalOperation = typename HalPolicy::Operation,
+ typename HalModel = typename HalPolicy::Model>
+bool ConvertCast(const HalOperation& operation,
+ const HalModel& model,
+ ConversionData& data)
+{
+ using HalOperand = typename HalPolicy::Operand;
+
+ ALOGV("HalPolicy::ConvertCast()");
+
+ LayerInputHandle input = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
+
+ if (!input.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
+ if (!output)
+ {
+ return Fail("%s: Could not read output 0", __func__);
+ }
+
+ const TensorInfo& inputInfo = input.GetTensorInfo();
+ const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ bool isSupported = false;
+ armnn::BackendId setBackend;
+ auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+ {
+ FORWARD_LAYER_SUPPORT_FUNC(__func__,
+ IsCastSupported,
+ data.m_Backends,
+ isSupported,
+ setBackend,
+ inputInfo,
+ outputInfo);
+ };
+
+ if(!IsDynamicTensor(outputInfo))
+ {
+ validateFunc(outputInfo, isSupported);
+ }
+ else
+ {
+ isSupported = AreDynamicTensorsSupported();
+ }
+
+ if (!isSupported)
+ {
+ return false;
+ }
+
+ IConnectableLayer* layer = data.m_Network->AddCastLayer();
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the CastLayer", __func__);
+ }
+ input.Connect(layer->GetInputSlot(0));
+
+ return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
+}
+
+template<typename HalPolicy,
+ typename HalOperation = typename HalPolicy::Operation,
+ typename HalModel = typename HalPolicy::Model>
+bool ConvertChannelShuffle(const HalOperation& operation,
+ const HalModel& model,
+ ConversionData& data)
+{
+ using HalOperand = typename HalPolicy::Operand;
+ using HalOperandType = typename HalPolicy::OperandType;
+
+ ALOGV("HalPolicy::ConvertChannelShuffle()");
+
+ LayerInputHandle input = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+ auto inputDimensions = static_cast<int32_t>(input.GetTensorInfo().GetNumDimensions());
+
+ ChannelShuffleDescriptor descriptor;
+
+ int32_t groups;
+ if (!GetInputScalar<HalPolicy>(operation, 1, HalOperandType::INT32, groups, model, data))
+ {
+ return Fail("%s: Operation has invalid or unsupported number of groups operand", __func__);
+ }
+ descriptor.m_NumGroups = static_cast<uint32_t>(groups);
+
+ int32_t axis;
+ if (!GetInputScalar<HalPolicy>(operation, 2, HalOperandType::INT32, axis, model, data))
+ {
+ return Fail("%s: Operation has invalid or unsupported dimension channel shuffle operand", __func__);
+ }
+ if (((axis < -inputDimensions) && (axis < 0)) || ((axis >= inputDimensions) && (axis > 0)))
+ {
+ return Fail("%s: Operation has invalid dimension: %d. It is out of bounds [-%d, %d))", __func__, axis,
+ inputDimensions, inputDimensions);
+ }
+ int positiveAxis = (axis < 0) ? inputDimensions + axis : axis;
+ descriptor.m_Axis = static_cast<uint32_t>(positiveAxis);
+
+ const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
+ if (!output)
+ {
+ return Fail("%s: Could not read output 0", __func__);
+ }
+
+ const TensorInfo& inputInfo = input.GetTensorInfo();
+ const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ bool isSupported = false;
+ armnn::BackendId setBackend;
+ auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+ {
+ FORWARD_LAYER_SUPPORT_FUNC(__func__,
+ IsChannelShuffleSupported,
+ data.m_Backends,
+ isSupported,
+ setBackend,
+ inputInfo,
+ outputInfo,
+ descriptor);
+ };
+
+ if(!IsDynamicTensor(outputInfo))
+ {
+ validateFunc(outputInfo, isSupported);
+ }
+ else
+ {
+ isSupported = AreDynamicTensorsSupported();
+ }
+
+ if (!isSupported)
+ {
+ return false;
+ }
+
+ IConnectableLayer* layer = data.m_Network->AddChannelShuffleLayer(descriptor);
+ layer->SetBackendId(setBackend);
+ assert(layer != nullptr);
+ input.Connect(layer->GetInputSlot(0));
+
+ return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
+}
+
+template<typename HalPolicy,
typename HalOperation = typename HalPolicy::Operation,
typename HalModel = typename HalPolicy::Model>
bool ConvertComparison_1_2(const HalOperation& operation,
@@ -143,12 +327,14 @@ bool ConvertComparison_1_2(const HalOperation& operation,
ComparisonDescriptor descriptor(comparisonOperation);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsComparisonSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo0,
inputInfo1,
outputInfo,
@@ -171,7 +357,11 @@ bool ConvertComparison_1_2(const HalOperation& operation,
}
IConnectableLayer* layer = data.m_Network->AddComparisonLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ComparisonLayer", __func__);
+ }
bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data);
if (!isReshapeSupported)
@@ -179,12 +369,6 @@ bool ConvertComparison_1_2(const HalOperation& operation,
return false;
}
- if(IsDynamicTensor(outputInfo))
- {
- input0.Connect(layer->GetInputSlot(0));
- input1.Connect(layer->GetInputSlot(1));
- }
-
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
}
@@ -237,26 +421,30 @@ bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, Con
// The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in] but ArmNN expects the
// filter's height and width indices to match the input's height and width indices so we permute it to OIHW if
// the DataLayout is NCHW
- const ConstTensorPin weightsPin = (desc.m_DataLayout == DataLayout::NCHW) ?
- ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 1,
- model, data, OHWIToOIHW) :
- ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 1, model, data);
- const ConstTensorPin biasPin =
- ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 2, model, data);
- if (!weightsPin.IsValid())
+
+ if (!IsWeightsValid<HalPolicy>(operation, 1, model, false) && desc.m_DataLayout == DataLayout::NCHW)
{
- return Fail("%s: Operation has invalid weights", __func__);
+ return Fail("%s: Operation has unsupported weights HalOperandLifeTime", __func__);
}
+ LayerInputHandle weightsInput = (desc.m_DataLayout == DataLayout::NCHW) ?
+ ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data, OHWIToOIHW) :
+ ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
- if (!biasPin.IsValid())
+ if (!weightsInput.IsValid())
{
- return Fail("%s: Operation has invalid biases", __func__);
+ return Fail("%s: Operation has invalid inputs", __func__);
}
- ConstTensor weights = weightsPin.GetConstTensor();
- ConstTensor bias = biasPin.GetConstTensor();
- SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo);
+ LayerInputHandle biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data); // 1D
+ if (!biasInput.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ biasInput.SanitizeQuantizationScale(weightsInput, input);
+ armnn::TensorInfo weightsInfo = weightsInput.GetTensorInfo();
+ armnn::TensorInfo biasInfo = biasInput.GetTensorInfo();
ActivationFn activation;
@@ -275,8 +463,8 @@ bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, Con
armnnUtils::DataLayoutIndexed dataLayoutIndexed(desc.m_DataLayout);
unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
- const uint32_t kernelX = weights.GetShape()[widthIndex];
- const uint32_t kernelY = weights.GetShape()[heightIndex];
+ const uint32_t kernelX = weightsInfo.GetShape()[widthIndex];
+ const uint32_t kernelY = weightsInfo.GetShape()[heightIndex];
const uint32_t inputX = inputInfo.GetShape()[widthIndex];
const uint32_t inputY = inputInfo.GetShape()[heightIndex];
@@ -305,19 +493,21 @@ bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, Con
}
desc.m_BiasEnabled = true;
- Optional<TensorInfo> biases(bias.GetInfo());
+ Optional<TensorInfo> biases(biasInfo);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsConvolution2dSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
desc,
- weights.GetInfo(),
+ weightsInfo,
biases);
};
@@ -335,8 +525,8 @@ bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, Con
return false;
}
- IConnectableLayer* startLayer =
- data.m_Network->AddConvolution2dLayer(desc, weights, Optional<ConstTensor>(bias));
+ armnn::IConnectableLayer* startLayer = data.m_Network->AddConvolution2dLayer(desc);
+ startLayer->SetBackendId(setBackend);
if (!startLayer)
{
@@ -344,6 +534,8 @@ bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, Con
}
input.Connect(startLayer->GetInputSlot(0));
+ weightsInput.Connect(startLayer->GetInputSlot(1));
+ biasInput.Connect(startLayer->GetInputSlot(2));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
data, nullptr, validateFunc, activation);
@@ -377,14 +569,15 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
// ArmNN does not currently support non-fixed weights or bias
+ if (!IsWeightsValid<HalPolicy>(operation, 1, model, false))
+ {
+ return Fail("%s: This Operation has unsupported weights HalOperandLifeTime", __func__);
+ }
+
// Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ]
const HalOperand* weightsOperand = GetInputOperand<HalPolicy>(operation, 1, model);
- if (weightsOperand == nullptr)
- {
- return Fail("%s: Operand is invalid", __func__);
- }
- if ( weightsOperand->dimensions[0] != 1)
+ if (weightsOperand->dimensions[0] != 1)
{
return Fail("%s: Invalid weights; for depthwise convolution, dimension 0 must be 1 but it is %i",
__func__, weightsOperand->dimensions[0] );
@@ -403,44 +596,30 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
desc.m_DataLayout = OptionalDataLayout<HalPolicy>(operation, dataLayoutFlagIndex, model, data);
armnnUtils::DataLayoutIndexed dataLayoutIndexed(desc.m_DataLayout);
- unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
- // Reinterpret weight data as [ H, W, I, M ]
- TensorShape weightsShape({ weightsOperand->dimensions[1],
- weightsOperand->dimensions[2],
- inputInfo.GetShape()[channelsIndex],
- weightsOperand->dimensions[3] / inputInfo.GetShape()[channelsIndex] });
-
- // Swizzle weight data [ H, W, I, M ] -> [ M, I, H, W ]
- const PermutationVector HWIMToMIHW = { 2U, 3U, 1U, 0U };
-
- const ConstTensorPin weightsPin =
- ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
- 1,
- model,
- data,
- HWIMToMIHW,
- &weightsShape);
-
- // Bias is a 1D tensor
- const ConstTensorPin biasPin =
- ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 2, model, data);
+ LayerInputHandle weightsInput = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
+ if (!weightsInput.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
- if (!weightsPin.IsValid())
+ const HalOperand* biasOperand = GetInputOperand<HalPolicy>(operation, 2, model);
+ if (!biasOperand)
{
- return Fail("%s: Operation has invalid weights", __func__);
+ return Fail("%s: Could not read bias", __func__);
}
- if (!biasPin.IsValid())
+ LayerInputHandle biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data); // 1D
+ if (!biasInput.IsValid())
{
- return Fail("%s: Operation has invalid biases", __func__);
+ return Fail("%s: Operation has invalid inputs", __func__);
}
- ConstTensor weights = weightsPin.GetConstTensor();
- ConstTensor bias = biasPin.GetConstTensor();
- SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo);
+ biasInput.SanitizeQuantizationScale(weightsInput, input);
+ armnn::TensorInfo weightsInfo = weightsInput.GetTensorInfo();
+ armnn::TensorInfo biasInfo = biasInput.GetTensorInfo();
ActivationFn activation;
@@ -456,8 +635,8 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
return Fail("%s: Operation has invalid inputs (implicit padding)", __func__);
}
- const uint32_t kernelX = weights.GetShape()[3];
- const uint32_t kernelY = weights.GetShape()[2];
+ const uint32_t kernelX = weightsInfo.GetShape()[2];
+ const uint32_t kernelY = weightsInfo.GetShape()[1];
const uint32_t inputX = inputInfo.GetShape()[widthIndex];
const uint32_t inputY = inputInfo.GetShape()[heightIndex];
@@ -485,19 +664,21 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
}
desc.m_BiasEnabled = true;
- Optional<TensorInfo> biases(bias.GetInfo());
+ Optional<TensorInfo> biases(biasInfo);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsDepthwiseConvolutionSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
desc,
- weights.GetInfo(),
+ weightsInfo,
biases);
};
@@ -515,8 +696,8 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
return false;
}
- IConnectableLayer* startLayer =
- data.m_Network->AddDepthwiseConvolution2dLayer(desc, weights, Optional<ConstTensor>(bias));
+ armnn::IConnectableLayer* startLayer = data.m_Network->AddDepthwiseConvolution2dLayer(desc);
+ startLayer->SetBackendId(setBackend);
if (!startLayer)
{
@@ -525,6 +706,10 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
input.Connect(startLayer->GetInputSlot(0));
+ // Connect weights and bias inputs
+ weightsInput.Connect(startLayer->GetInputSlot(1));
+ biasInput.Connect(startLayer->GetInputSlot(2));
+
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
data, nullptr, validateFunc, activation);
}
@@ -578,13 +763,14 @@ bool ConvertElementwiseUnary(const HalOperation& operation,
ElementwiseUnaryDescriptor descriptor(unaryOperation);
bool isSupported = false;
-
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsElementwiseUnarySupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -605,7 +791,11 @@ bool ConvertElementwiseUnary(const HalOperation& operation,
}
IConnectableLayer* layer = data.m_Network->AddElementwiseUnaryLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ElementwiseUnaryLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -657,12 +847,14 @@ bool ConvertExpandDims(const HalOperation& operation, const HalModel& model, Con
reshapeDescriptor.m_TargetShape = targetShape;
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsReshapeSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
outputInfo,
reshapeDescriptor);
@@ -687,7 +879,11 @@ bool ConvertExpandDims(const HalOperation& operation, const HalModel& model, Con
}
IConnectableLayer* layer = data.m_Network->AddReshapeLayer(reshapeDescriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ReshapeLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -735,7 +931,8 @@ bool ConvertGather(const HalOperation& operation, const HalModel& model, Convers
{
return Fail("%s: Operation has invalid or unsupported axis operand", __func__);
}
- if (((axis < -inputDimensions) && (axis < 0)) || ((axis >= inputDimensions) && (axis > 0)))
+ int32_t inputDimensions_int = static_cast<int32_t>(inputDimensions);
+ if ((axis < -inputDimensions_int) || (inputDimensions_int <= axis))
{
return Fail("%s: Operation has invalid axis: %d. It is out of bounds [-%d, %d))", __func__, axis,
inputDimensions, inputDimensions);
@@ -745,12 +942,14 @@ bool ConvertGather(const HalOperation& operation, const HalModel& model, Convers
desc.m_Axis = axis;
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsGatherSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
indices.GetTensorInfo(),
outputInfo,
@@ -772,7 +971,11 @@ bool ConvertGather(const HalOperation& operation, const HalModel& model, Convers
}
IConnectableLayer* layer = data.m_Network->AddGatherLayer(desc);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the GatherLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
indices.Connect(layer->GetInputSlot(1));
@@ -840,7 +1043,6 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
const TensorShape& inputShape = inputInfo.GetShape();
const TensorShape& outputShape = outputInfo.GetShape();
const TensorShape& weightsShape = weights.GetShape();
- const TensorShape& biasesShape = biases.GetShape();
armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
@@ -851,7 +1053,7 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
desc.m_DataLayout = dataLayout;
desc.m_BiasEnabled = true;
- int numGroups;
+ unsigned int numGroups;
ActivationFn activation;
if (operation.inputs.size() == 12)
@@ -929,6 +1131,7 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
std::vector<std::reference_wrapper<TensorInfo>> splitterOutputInfos(numGroups, std::ref(splitterOutputInfo));
ViewsDescriptor splitterDesc(numGroups);
+ splitterDesc.SetAxis(armnn::numeric_cast<int32_t>(channelsIndex));
for (unsigned int group = 0u; group < numGroups; ++group)
{
splitterDesc.SetViewOriginCoord(group, channelsIndex, splitterDimSizes[channelsIndex] * group);
@@ -939,10 +1142,12 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
}
bool isSupported = false;
+ armnn::BackendId setBackendSplit;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsSplitterSupported,
data.m_Backends,
isSupported,
+ setBackendSplit,
inputInfo,
splitterOutputInfos,
splitterDesc);
@@ -952,6 +1157,7 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
}
IConnectableLayer* splitterLayer = data.m_Network->AddSplitterLayer(splitterDesc);
+ splitterLayer->SetBackendId(setBackendSplit);
if (!splitterLayer)
{
return Fail("%s: Failed to add SplitterLayer", __func__);
@@ -1035,12 +1241,14 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
biasesDataOffset));
isSupported = false;
+ armnn::BackendId setBackendConv;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsConvolution2dSupported,
data.m_Backends,
isSupported,
+ setBackendConv,
groupInputInfo,
outputInfo,
desc,
@@ -1062,14 +1270,22 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
return false;
}
- IConnectableLayer* convLayer =
- data.m_Network->AddConvolution2dLayer(desc, groupWeights, Optional<ConstTensor>(groupBiases));
+ IConnectableLayer* weightsLayer = data.m_Network->AddConstantLayer(groupWeights);
+ IConnectableLayer* biasLayer = data.m_Network->AddConstantLayer(groupBiases);
+ IConnectableLayer* convLayer = data.m_Network->AddConvolution2dLayer(desc);
+ convLayer->SetBackendId(setBackendConv);
+
if (!convLayer)
{
return Fail("%s: AddConvolution2dLayer failed", __func__);
}
splitterLayer->GetOutputSlot(group).Connect(convLayer->GetInputSlot(0));
+ weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1));
+ biasLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2));
+
+ weightsLayer->GetOutputSlot(0).SetTensorInfo(groupWeightsInfo);
+ biasLayer->GetOutputSlot(0).SetTensorInfo(groupBiasesInfo);
convLayer->GetOutputSlot(0).SetTensorInfo(groupOutputInfo);
if(isDynamic)
@@ -1107,10 +1323,12 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
}
isSupported = false;
+ armnn::BackendId setBackendConcat;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsConcatSupported,
data.m_Backends,
isSupported,
+ setBackendConcat,
std::vector<const TensorInfo*>(numGroups * channelMultiplier, &groupOutputInfo),
outputInfo,
concatDescriptor);
@@ -1121,6 +1339,7 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
}
IConnectableLayer* concatLayer = data.m_Network->AddConcatLayer(concatDescriptor);
+ concatLayer->SetBackendId(setBackendConcat);
if (!concatLayer)
{
return Fail("%s: AddConcatLayer failed", __func__);
@@ -1208,12 +1427,14 @@ bool ConvertInstanceNormalization(const HalOperation& operation, const HalModel&
desc.m_DataLayout = OptionalDataLayout<HalPolicy>(operation, 4, model, data);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsInstanceNormalizationSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
outputInfo,
desc);
@@ -1234,6 +1455,7 @@ bool ConvertInstanceNormalization(const HalOperation& operation, const HalModel&
}
IConnectableLayer* layer = data.m_Network->AddInstanceNormalizationLayer(desc);
+ layer->SetBackendId(setBackend);
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -1302,12 +1524,14 @@ bool ConvertLogSoftmax(const HalOperation& operation, const HalModel& model, Con
}
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsLogSoftmaxSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
outputInfo,
descriptor);
@@ -1328,11 +1552,11 @@ bool ConvertLogSoftmax(const HalOperation& operation, const HalModel& model, Con
}
IConnectableLayer* layer = data.m_Network->AddLogSoftmaxLayer(descriptor);
+ layer->SetBackendId(setBackend);
if (!layer)
{
- return Fail("%s: AddLogSoftmaxLayer() returned nullptr", __func__);
+ return Fail("%s: Could not add the LogSoftmaxLayer", __func__);
}
-
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -1341,130 +1565,6 @@ bool ConvertLogSoftmax(const HalOperation& operation, const HalModel& model, Con
template<typename HalPolicy,
typename HalOperation = typename HalPolicy::Operation,
typename HalModel = typename HalPolicy::Model>
-bool ConvertMaximum(const HalOperation& operation, const HalModel& model, ConversionData& data)
-{
- using HalOperand = typename HalPolicy::Operand;
-
- ALOGV("HalPolicy::ConvertMaximum()");
-
- LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
- LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
-
- if (!input0.IsValid() || !input1.IsValid())
- {
- return Fail("%s: Operation has invalid inputs", __func__);
- }
-
- const HalOperand* outputOperand = GetOutputOperand<HalPolicy>(operation, 0, model);
- if (!outputOperand)
- {
- return Fail("%s: Could not read output", __func__);
- }
-
- const TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand);
-
- bool isSupported = false;
- auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
- {
- FORWARD_LAYER_SUPPORT_FUNC(__func__,
- IsMaximumSupported,
- data.m_Backends,
- isSupported,
- input0.GetTensorInfo(),
- input1.GetTensorInfo(),
- outInfo);
- };
-
- if(IsDynamicTensor(outInfo))
- {
- isSupported = AreDynamicTensorsSupported();
- }
- else
- {
- validateFunc(outInfo, isSupported);
- }
-
- if (!isSupported)
- {
- return false;
- }
-
- IConnectableLayer* layer = data.m_Network->AddMaximumLayer();
- assert(layer != nullptr);
- bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data);
- if (!isReshapeSupported)
- {
- return false;
- }
-
- return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
-}
-
-template<typename HalPolicy,
- typename HalOperation = typename HalPolicy::Operation,
- typename HalModel = typename HalPolicy::Model>
-bool ConvertMinimum(const HalOperation& operation, const HalModel& model, ConversionData& data)
-{
- using HalOperand = typename HalPolicy::Operand;
-
- ALOGV("HalPolicy::ConvertMinimum()");
-
- LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
- LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
-
- if (!input0.IsValid() || !input1.IsValid())
- {
- return Fail("%s: Operation has invalid inputs", __func__);
- }
-
- const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
- if (!output)
- {
- return Fail("%s: Could not read output 0", __func__);
- }
-
- const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
-
- bool isSupported = false;
- auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
- {
- FORWARD_LAYER_SUPPORT_FUNC(__func__,
- IsMinimumSupported,
- data.m_Backends,
- isSupported,
- input0.GetTensorInfo(),
- input1.GetTensorInfo(),
- outputInfo);
- };
-
- if(IsDynamicTensor(outputInfo))
- {
- isSupported = AreDynamicTensorsSupported();
- }
- else
- {
- validateFunc(outputInfo, isSupported);
- }
-
- if (!isSupported)
- {
- return false;
- }
-
- IConnectableLayer* const layer = data.m_Network->AddMinimumLayer();
- assert(layer != nullptr);
- bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data);
- if (!isReshapeSupported)
- {
- return false;
- }
-
- return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
-}
-
-template<typename HalPolicy,
- typename HalOperation = typename HalPolicy::Operation,
- typename HalModel = typename HalPolicy::Model>
bool ConvertPadV2(const HalOperation& operation, const HalModel& model, ConversionData& data)
{
using HalOperand = typename HalPolicy::Operand;
@@ -1523,7 +1623,7 @@ bool ConvertPadV2(const HalOperation& operation, const HalModel& model, Conversi
return Fail("%s: Could not read input 2 (FLOAT32)", __func__);
}
}
- else if (operandType0 == HalOperandType::TENSOR_QUANT8_ASYMM && operandType2 == HalOperandType::INT32)
+ else if (isQuantizedOperand(operandType0) && operandType2 == HalOperandType::INT32)
{
int32_t intPadValue = 0;
if (!GetInputInt32<HalPolicy>(operation, 2, intPadValue, model, data))
@@ -1538,12 +1638,14 @@ bool ConvertPadV2(const HalOperation& operation, const HalModel& model, Conversi
}
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsPadSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -1564,7 +1666,11 @@ bool ConvertPadV2(const HalOperation& operation, const HalModel& model, Conversi
}
IConnectableLayer* const layer = data.m_Network->AddPadLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the PadLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -1599,12 +1705,14 @@ bool ConvertPrelu(const HalOperation& operation, const HalModel& model, Conversi
const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsPreluSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
alphaInfo,
outputInfo);
@@ -1625,10 +1733,10 @@ bool ConvertPrelu(const HalOperation& operation, const HalModel& model, Conversi
}
IConnectableLayer* const layer = data.m_Network->AddPreluLayer();
-
+ layer->SetBackendId(setBackend);
if (!layer)
{
- return Fail("%s: AddPreluLayer failed", __func__);
+ return Fail("%s: Could not add the PreluLayer", __func__);
}
bool isReshapeSupported = BroadcastTensor(input, alpha, layer, data);
@@ -1664,12 +1772,14 @@ bool ConvertQuantize(const HalOperation& operation, const HalModel& model, Conve
const TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsQuantizeSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
outputInfo);
};
@@ -1689,7 +1799,11 @@ bool ConvertQuantize(const HalOperation& operation, const HalModel& model, Conve
}
IConnectableLayer* const layer = data.m_Network->AddQuantizeLayer();
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the QuantizeLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -1883,12 +1997,14 @@ bool ConvertQuantized16BitLstm(const HalOperation& operation, const HalModel& mo
paramsInfo.m_OutputGateBias = &(params.m_OutputGateBias->GetInfo());
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsQuantizedLstmSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
previousCellStateInInfo,
previousOutputInInfo,
@@ -1915,6 +2031,7 @@ bool ConvertQuantized16BitLstm(const HalOperation& operation, const HalModel& mo
}
IConnectableLayer* const layer = data.m_Network->AddQuantizedLstmLayer(params, "QuantizedLstm");
+ layer->SetBackendId(setBackend);
input.Connect(layer->GetInputSlot(0));
previousCellStateIn.Connect(layer->GetInputSlot(1));
previousOutputIn.Connect(layer->GetInputSlot(2));
@@ -1987,12 +2104,14 @@ bool ConvertReduce(const HalOperation& operation,
}
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsReduceSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -2013,7 +2132,11 @@ bool ConvertReduce(const HalOperation& operation,
}
armnn::IConnectableLayer* const layer = data.m_Network->AddReduceLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ReduceLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2136,12 +2259,14 @@ bool ConvertResize(const HalOperation& operation,
descriptor.m_HalfPixelCenters = GetOptionalBool<HalPolicy>(operation, 5, model, data);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsResizeSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -2162,7 +2287,11 @@ bool ConvertResize(const HalOperation& operation,
}
IConnectableLayer* layer = data.m_Network->AddResizeLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the ResizeLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2205,18 +2334,20 @@ bool ConvertSpaceToDepth(const HalOperation& operation, const HalModel& model, C
if (desc.m_BlockSize <= 1)
{
- return Fail("%s: Block size must be at least 1 in all dimensions");
+ return Fail("%s: Block size must be at least 1 in all dimensions", __func__);
}
desc.m_DataLayout = OptionalDataLayout<HalPolicy>(operation, 2, model, data);
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsSpaceToDepthSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
desc);
@@ -2237,7 +2368,11 @@ bool ConvertSpaceToDepth(const HalOperation& operation, const HalModel& model, C
}
IConnectableLayer* const layer = data.m_Network->AddSpaceToDepthLayer(desc);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the SpaceToDepthLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2301,12 +2436,14 @@ bool ConvertSoftmax(const HalOperation& operation, const HalModel& model, Conver
}
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsSoftmaxSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
outputInfo,
desc);
@@ -2327,13 +2464,171 @@ bool ConvertSoftmax(const HalOperation& operation, const HalModel& model, Conver
}
IConnectableLayer* layer = data.m_Network->AddSoftmaxLayer(desc);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the SoftmaxLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
}
template<typename HalPolicy,
+ typename Operation = typename HalPolicy::Operation,
+ typename Model = typename HalPolicy::Model>
+bool ConvertSplit(const Operation& operation, const Model& model, ConversionData& data)
+{
+ using HalOperand = typename HalPolicy::Operand;
+ using HalOperandType = typename HalPolicy::OperandType;
+
+ if (operation.inputs.size() != 3)
+ {
+ return Fail("%s: Optional inputs are not supported expected 3 was %i", __func__, operation.inputs.size());
+ }
+
+ // 0: An n-D tensor to split.
+ LayerInputHandle input = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
+ // 1: An ANEURALNETWORKS_INT32 scalar specifying the axis along which to split.
+ int32_t axis = 0;
+ // 2: An ANEURALNETWORKS_INT32 scalar indicating the number of splits along given axis.
+ // Must evenly divide axis size.
+ int32_t numSplits = 0;
+
+ if (!input.IsValid() ||
+ !GetInputScalar<HalPolicy>(operation, 1, HalOperandType::INT32, axis, model, data) ||
+ !GetInputScalar<HalPolicy>(operation, 2, HalOperandType::INT32, numSplits, model, data))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ // If number of splits is <= zero, return false.
+ if (numSplits <= 0)
+ {
+ return Fail("%s: Number of splits must be greater than zero", __func__);
+ }
+ const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
+ unsigned int inputDimSize = inputInfo.GetNumDimensions();
+ int32_t inputDimensions = static_cast<int32_t>(inputDimSize);
+
+ if (axis < -inputDimensions || axis >= inputDimensions)
+ {
+ // The axis for a tensor with n dimensions must be between -n and n-1
+ // E.g. Rank 4 tensor can have axis in range [-4, 3)
+ // -1 == 3, -2 == 2, -3 == 1, -4 == 0
+ return Fail("%s: Operation has invalid axis %i. Axis must be in range [-n, n-1]", __func__, axis);
+ }
+ auto splitDim = armnnUtils::GetUnsignedAxis(inputDimSize, axis);
+
+ if (inputDimSize > MaxNumOfTensorDimensions)
+ {
+ return Fail("%s: The number of dimensions %i for split operation cannot be greater than %i",
+ __func__, inputInfo.GetNumDimensions(), MaxNumOfTensorDimensions);
+ }
+ std::vector<uint32_t> splitterDimSizes(inputDimSize);
+
+ // Add current input shape to splitterDimSizes
+ for (uint32_t i = 0; i < inputDimSize; ++i)
+ {
+ splitterDimSizes[i] = inputInfo.GetShape()[i];
+ }
+
+ if (splitterDimSizes[splitDim] % numSplits != 0)
+ {
+ return Fail("%s: The number of splits %i must evenly divide the dimension %i",
+ __func__, numSplits, splitterDimSizes[splitDim]);
+ }
+ splitterDimSizes[splitDim] /= numSplits;
+
+ ViewsDescriptor descriptor(numSplits, inputDimSize);
+
+ for (int32_t i = 0; i < numSplits; ++i)
+ {
+ // Set the size of the views.
+ for (uint32_t dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
+ {
+ descriptor.SetViewSize(i, dimIdx, splitterDimSizes[dimIdx]);
+ }
+ descriptor.SetViewOriginCoord(i, splitDim, splitterDimSizes[splitDim] * i);
+ }
+
+ std::vector<TensorInfo> outputInfos;
+ for (int32_t i = 0; i < numSplits; ++i)
+ {
+ const HalOperand* output = GetOutputOperand<HalPolicy>(operation, i, model);
+ if (!output)
+ {
+ return Fail("%s: Could not read output %i", __func__, i);
+ }
+
+ const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+ outputInfos.template emplace_back(outputInfo);
+ }
+ std::vector<std::reference_wrapper<TensorInfo>> splitterOutputInfos(outputInfos.begin(), outputInfos.end());
+ bool isSupported = false;
+ armnn::BackendId setBackend;
+
+ FORWARD_LAYER_SUPPORT_FUNC(__func__,
+ IsSplitterSupported,
+ data.m_Backends,
+ isSupported,
+ setBackend,
+ inputInfo,
+ splitterOutputInfos,
+ descriptor);
+
+ if (!isSupported)
+ {
+ return Fail("%s: Layer is not supported", __func__);
+ }
+
+ for (int32_t i = 0; i < numSplits; ++i)
+ {
+ const HalOperand* output = GetOutputOperand<HalPolicy>(operation, i, model);
+ if (!output)
+ {
+ return Fail("%s: Could not read output %i", __func__, i);
+ }
+
+ const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+ if (IsDynamicTensor(outputInfo))
+ {
+ return Fail("%s: Dynamic output tensors are not supported", __func__);
+ }
+ }
+
+ IConnectableLayer* layer = data.m_Network->AddSplitterLayer(descriptor, "Split");
+ if (!layer)
+ {
+ return Fail("%s: could not add the Layer", __func__);
+ }
+ input.Connect(layer->GetInputSlot(0));
+
+ auto validateFunc = [&](const armnn::TensorInfo&, bool& isSupported)
+ {
+ FORWARD_LAYER_SUPPORT_FUNC(__func__,
+ IsSplitterSupported,
+ data.m_Backends,
+ isSupported,
+ setBackend,
+ inputInfo,
+ splitterOutputInfos,
+ descriptor);
+ };
+
+ for (int32_t i = 0; i < numSplits; ++i)
+ {
+ bool ok = SetupAndTrackLayerOutputSlot<HalPolicy>(operation, i, *layer, model, data, nullptr, validateFunc);
+
+ if (!ok)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+template<typename HalPolicy,
typename HalOperation = typename HalPolicy::Operation,
typename HalModel = typename HalPolicy::Model>
bool ConvertLstm(const HalOperation& operation, const HalModel& model, ConversionData& data)
@@ -2474,7 +2769,7 @@ bool ConvertLstm(const HalOperation& operation, const HalModel& model, Conversio
// If set to 0.0 then clipping is disabled.
// 22: The clipping threshold: for the output from the projection layer, such that values are bound within
// [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
- ActivationFn activation;
+ ActivationFn activation = ActivationFn::kActivationNone;
float cellClip;
float projClip;
if (!GetInputActivationFunctionFromTensor<HalPolicy>(operation, 20, activation, model, data) ||
@@ -2688,12 +2983,14 @@ bool ConvertLstm(const HalOperation& operation, const HalModel& model, Conversio
}
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsLstmSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputStateInInfo,
cellStateInInfo,
@@ -2726,6 +3023,7 @@ bool ConvertLstm(const HalOperation& operation, const HalModel& model, Conversio
// Add the layer
IConnectableLayer* layer = data.m_Network->AddLstmLayer(desc, params, "Lstm");
+ layer->SetBackendId(setBackend);
input.Connect(layer->GetInputSlot(0));
outputStateIn.Connect(layer->GetInputSlot(1));
@@ -2754,6 +3052,84 @@ bool ConvertLstm(const HalOperation& operation, const HalModel& model, Conversio
template<typename HalPolicy,
typename HalOperation = typename HalPolicy::Operation,
typename HalModel = typename HalPolicy::Model>
+bool ConvertTile(const HalOperation& operation, const HalModel& model, ConversionData& data)
+{
+ using HalOperand = typename HalPolicy::Operand;
+
+ LayerInputHandle input = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
+
+ if (!input.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
+
+ if (!output)
+ {
+ return Fail("%s: Could not read output", __func__);
+ }
+
+ const HalOperand* multiplesOperand = GetInputOperand<HalPolicy>(operation, 1, model);
+ if (!multiplesOperand)
+ {
+ return Fail("%s: Could not read input 1", __func__);
+ }
+ std::vector<int32_t> multiples;
+ if (!GetTensorInt32Values<HalPolicy>(*multiplesOperand, multiples, model, data))
+ {
+ return Fail("%s: Input 1 has invalid values", __func__);
+ }
+ // Convert the multiples from int to unsigned int,
+ // as values are always going to be positive despite the data type being integer.
+ TileDescriptor descriptor;
+ descriptor.m_Multiples.assign(multiples.begin(), multiples.end());
+
+ const TensorInfo& inputInfo = input.GetTensorInfo();
+ const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ bool isSupported = false;
+ armnn::BackendId setBackend;
+ auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+ {
+ FORWARD_LAYER_SUPPORT_FUNC(__func__,
+ IsTileSupported,
+ data.m_Backends,
+ isSupported,
+ setBackend,
+ inputInfo,
+ outputInfo,
+ descriptor);
+ };
+
+ if(IsDynamicTensor(outputInfo))
+ {
+ isSupported = AreDynamicTensorsSupported();
+ }
+ else
+ {
+ validateFunc(outputInfo, isSupported);
+ }
+ if (!isSupported)
+ {
+ return false;
+ }
+
+ IConnectableLayer* tileLayer = data.m_Network->AddTileLayer(descriptor);
+ if (!tileLayer)
+ {
+ return Fail("%s: AddTileLayer failed", __func__);
+ }
+ tileLayer->SetBackendId(setBackend);
+
+ input.Connect(tileLayer->GetInputSlot(0));
+
+ return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *tileLayer, model, data, nullptr, validateFunc);
+}
+
+template<typename HalPolicy,
+ typename HalOperation = typename HalPolicy::Operation,
+ typename HalModel = typename HalPolicy::Model>
bool ConvertTransposeConv2d(const HalOperation& operation, const HalModel& model, ConversionData& data)
{
using HalOperand = typename HalPolicy::Operand;
@@ -2925,12 +3301,14 @@ bool ConvertTransposeConv2d(const HalOperation& operation, const HalModel& model
Optional<TensorInfo> biases(bias.GetInfo());
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsTransposeConvolution2dSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
desc,
@@ -2953,6 +3331,7 @@ bool ConvertTransposeConv2d(const HalOperation& operation, const HalModel& model
IConnectableLayer* startLayer =
data.m_Network->AddTransposeConvolution2dLayer(desc, weights, Optional<ConstTensor>(bias));
+ startLayer->SetBackendId(setBackend);
if (!startLayer)
{
return Fail("%s: AddTransposeConvolution2dLayer failed", __func__);
@@ -2964,4 +3343,450 @@ bool ConvertTransposeConv2d(const HalOperation& operation, const HalModel& model
data, nullptr, validateFunc, activation);
}
+template<typename HalPolicy,
+ typename HalOperation = typename HalPolicy::Operation,
+ typename HalModel = typename HalPolicy::Model>
+bool ConvertUnidirectionalSequenceLstm(const HalOperation& operation,
+ const HalModel& model,
+ ConversionData& data)
+{
+ using HalOperand = typename HalPolicy::Operand;
+ using HalOperandType = typename HalPolicy::OperandType;
+
+ ALOGV("HalPolicy::ConvertUnidirectionalSequenceLstm()");
+
+ // Determine if input OperandType is ANEURALNETWORKS_TENSOR_FLOAT 32 or 16
+ HalOperandType inputType;
+ if (!GetOperandType<HalPolicy>(operation, 0, model, inputType))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ // Inputs:
+ // 0: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+ // [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length), “batch_size”
+ // corresponds to the batching dimension, and “input_size” is the size of the input.
+ LayerInputHandle input = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Could not read input 0: input", __func__);
+ }
+ // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [batch_size, output_size].
+ LayerInputHandle outputStateIn = ConvertToLayerInputHandle<HalPolicy>(operation, 18, model, data);
+ if (!outputStateIn.IsValid())
+ {
+ return Fail("%s: Could not read input 18: outputStateIn", __func__);
+ }
+ // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [batch_size, num_units].
+ LayerInputHandle cellStateIn = ConvertToLayerInputHandle<HalPolicy>(operation, 19, model, data);
+ if (!cellStateIn.IsValid())
+ {
+ return Fail("%s: Could not read input 19: cellStateIn", __func__);
+ }
+
+ // Get the mandatory input tensors:
+ // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [num_units, input_size].
+ const ConstTensorPin inputToForgetWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 2));
+ // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [num_units, input_size].
+ const ConstTensorPin inputToCellWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 3));
+ // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [num_units, input_size].
+ const ConstTensorPin inputToOutputWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 4));
+ // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [num_units, output_size].
+ const ConstTensorPin recurrentToForgetWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 6));
+ // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ const ConstTensorPin recurrentToCellWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 7));
+ // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [num_units, output_size].
+ const ConstTensorPin recurrentToOutputWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 8));
+ // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+ const ConstTensorPin forgetGateBiasPin =
+ ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 13, model, data);
+ // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+ const ConstTensorPin cellBiasPin =
+ ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 14, model, data);
+ // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+ const ConstTensorPin outputGateBiasPin =
+ ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 15, model, data);
+
+ if (!inputToForgetWeightsPin.IsValid() ||
+ !inputToCellWeightsPin.IsValid() ||
+ !inputToOutputWeightsPin.IsValid() ||
+ !recurrentToForgetWeightsPin.IsValid() ||
+ !recurrentToCellWeightsPin.IsValid() ||
+ !recurrentToOutputWeightsPin.IsValid() ||
+ !forgetGateBiasPin.IsValid() ||
+ !cellBiasPin.IsValid() ||
+ !outputGateBiasPin.IsValid())
+ {
+ return Fail("%s: Operation has invalid tensor inputs", __func__);
+ }
+
+ // Get the optional input tensors:
+ // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [num_units, input_size], where “num_units” corresponds to the number of cell units.
+ const ConstTensorPin inputToInputWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 1, true));
+ // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+ // “num_units”), or the second dimension of the “projection_weights”, if defined.
+ const ConstTensorPin recurrentToInputWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 5, true));
+ // 09: The cell-to-input weights: Optional.
+ // A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+ const ConstTensorPin cellToInputWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 9, true));
+ // 10: The cell-to-forget weights: Optional.
+ // A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+ const ConstTensorPin cellToForgetWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 10, true));
+ // 11: The cell-to-output weights: Optional.
+ // A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+ const ConstTensorPin cellToOutputWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 11, true));
+ // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+ const ConstTensorPin inputGateBiasPin =
+ ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
+ 12,
+ model,
+ data,
+ g_DontPermute,
+ nullptr,
+ true);
+
+ // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [output_size, num_units].
+ const ConstTensorPin projectionWeightsPin =
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 16, true));
+ // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [output_size].
+ const ConstTensorPin projectionBiasPin =
+ ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
+ 17,
+ model,
+ data,
+ g_DontPermute,
+ nullptr,
+ true);
+
+ if ((!inputToInputWeightsPin.IsValid() && !inputToInputWeightsPin.IsOptional()) ||
+ (!recurrentToInputWeightsPin.IsValid() && !recurrentToInputWeightsPin.IsOptional()) ||
+ (!cellToInputWeightsPin.IsValid() && !cellToInputWeightsPin.IsOptional()) ||
+ (!cellToForgetWeightsPin.IsValid() && !cellToForgetWeightsPin.IsOptional()) ||
+ (!cellToOutputWeightsPin.IsValid() && !cellToOutputWeightsPin.IsOptional()) ||
+ (!inputGateBiasPin.IsValid() && !inputGateBiasPin.IsOptional()) ||
+ (!projectionWeightsPin.IsValid() && !projectionWeightsPin.IsOptional()) ||
+ (!projectionBiasPin.IsValid() && !projectionBiasPin.IsOptional()))
+ {
+ return Fail("%s: Operation has invalid tensor inputs", __func__);
+ }
+
+ // Get the mandatory input scalars (actually 1-D tensors of size 1):
+ // 20: The activation function: A value indicating the activation function:
+ // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+ // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+ // If set to 0.0 then clipping is disabled.
+ // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+ // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+ // Determine data type of input tensor
+ ActivationFn activation = ActivationFn::kActivationNone;
+ LstmDescriptor desc;
+
+ if (inputType == HalOperandType::TENSOR_FLOAT32)
+ {
+ float cellClip;
+ float projClip;
+
+ if (!GetInputActivationFunctionFromTensor<HalPolicy>(operation, 20, activation, model, data) ||
+ !GetInputScalar<HalPolicy>(operation, 21, HalOperandType::FLOAT32, cellClip, model, data) ||
+ !GetInputScalar<HalPolicy>(operation, 22, HalOperandType::FLOAT32, projClip, model, data))
+ {
+ return Fail("%s: Operation has invalid scalar inputs", __func__);
+ }
+
+ desc.m_ClippingThresCell = cellClip;
+ desc.m_ClippingThresProj = projClip;
+ }
+
+ if (inputType == HalOperandType::TENSOR_FLOAT16)
+ {
+ Half cellClip;
+ Half projClip;
+
+ if (!GetInputActivationFunctionFromTensor<HalPolicy>(operation, 20, activation, model, data) ||
+ !GetInputScalar<HalPolicy>(operation, 21, HalOperandType::FLOAT16, cellClip, model, data) ||
+ !GetInputScalar<HalPolicy>(operation, 22, HalOperandType::FLOAT16, projClip, model, data))
+ {
+ return Fail("%s: Operation has invalid scalar inputs", __func__);
+ }
+
+ desc.m_ClippingThresCell = cellClip;
+ desc.m_ClippingThresProj = projClip;
+ }
+
+ // Determine if time-major or batch-major.
+ // 23: Time-major if true, batch-major if false.
+ bool isTimeMajor = GetOptionalBool<HalPolicy>(operation, 23, model, data);
+
+ // Get the normalization tensors
+ // 24: The input layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at input gate.
+ const ConstTensorPin inputLayerNormWeightsPin
+ (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 24, true));
+
+ // 25: The forget layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at forget gate.
+ const ConstTensorPin forgetLayerNormWeightsPin =
+ ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
+ 25,
+ model,
+ data,
+ g_DontPermute,
+ nullptr,
+ true);
+
+ // 26: The cell layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at cell gate.
+ const ConstTensorPin cellLayerNormWeightsPin =
+ ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
+ 26,
+ model,
+ data,
+ g_DontPermute,
+ nullptr,
+ true);
+
+ // 27: The output layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at output gate.
+ const ConstTensorPin outputLayerNormWeightsPin =
+ ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
+ 27,
+ model,
+ data,
+ g_DontPermute,
+ nullptr,
+ true);
+
+ // Outputs:
+ // 00: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape: if time-major:
+ // [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+ const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
+ if (!output)
+ {
+ return Fail("%s: Could not read output: ", __func__);
+ }
+
+ //
+ // 01 & 02:
+ // hiddenStateOut and cellStateOut are not currently supported by our android versioning.
+ //
+
+ // set the params structure for the AddLstmLayer call
+ LstmInputParams params;
+ params.m_InputToInputWeights = inputToInputWeightsPin.GetConstTensorPtr();
+ params.m_InputToForgetWeights = inputToForgetWeightsPin.GetConstTensorPtr();
+ params.m_InputToCellWeights = inputToCellWeightsPin.GetConstTensorPtr();
+ params.m_InputToOutputWeights = inputToOutputWeightsPin.GetConstTensorPtr();
+ params.m_RecurrentToInputWeights = recurrentToInputWeightsPin.GetConstTensorPtr();
+ params.m_RecurrentToForgetWeights = recurrentToForgetWeightsPin.GetConstTensorPtr();
+ params.m_RecurrentToCellWeights = recurrentToCellWeightsPin.GetConstTensorPtr();
+ params.m_RecurrentToOutputWeights = recurrentToOutputWeightsPin.GetConstTensorPtr();
+ params.m_CellToInputWeights = cellToInputWeightsPin.GetConstTensorPtr();
+ params.m_CellToForgetWeights = cellToForgetWeightsPin.GetConstTensorPtr();
+ params.m_CellToOutputWeights = cellToOutputWeightsPin.GetConstTensorPtr();
+ params.m_InputGateBias = inputGateBiasPin.GetConstTensorPtr();
+ params.m_ForgetGateBias = forgetGateBiasPin.GetConstTensorPtr();
+ params.m_CellBias = cellBiasPin.GetConstTensorPtr();
+ params.m_OutputGateBias = outputGateBiasPin.GetConstTensorPtr();
+ params.m_ProjectionWeights = projectionWeightsPin.GetConstTensorPtr();
+ params.m_ProjectionBias = projectionBiasPin.GetConstTensorPtr();
+ params.m_InputLayerNormWeights = inputLayerNormWeightsPin.GetConstTensorPtr();
+ params.m_ForgetLayerNormWeights = forgetLayerNormWeightsPin.GetConstTensorPtr();
+ params.m_CellLayerNormWeights = cellLayerNormWeightsPin.GetConstTensorPtr();
+ params.m_OutputLayerNormWeights = outputLayerNormWeightsPin.GetConstTensorPtr();
+
+ // set the layer descriptor
+ desc.m_ActivationFunc = activation;
+ desc.m_CifgEnabled = (params.m_InputToInputWeights == nullptr ||
+ params.m_RecurrentToInputWeights == nullptr ||
+ params.m_InputGateBias == nullptr);
+ desc.m_PeepholeEnabled = (params.m_CellToForgetWeights != nullptr ||
+ params.m_CellToOutputWeights != nullptr);
+ desc.m_ProjectionEnabled = (params.m_ProjectionWeights != nullptr);
+ desc.m_LayerNormEnabled = (params.m_InputLayerNormWeights != nullptr ||
+ params.m_ForgetLayerNormWeights != nullptr ||
+ params.m_CellLayerNormWeights != nullptr ||
+ params.m_OutputLayerNormWeights != nullptr);
+ desc.m_TimeMajor = isTimeMajor;
+
+ // validate the optional input groups
+ if (desc.m_CifgEnabled &&
+ (params.m_InputToInputWeights != nullptr ||
+ params.m_RecurrentToInputWeights != nullptr ||
+ params.m_InputGateBias != nullptr))
+ {
+ return Fail("%s: All, or none, of input-to-input weights, recurrent-to-input weights,"
+ " and input gate bias must be provided", __func__);
+ }
+
+ if (!desc.m_ProjectionEnabled && params.m_ProjectionBias != nullptr)
+ {
+ return Fail("%s: projection bias should not be provided without projection weights", __func__);
+ }
+
+ if (desc.m_PeepholeEnabled &&
+ (params.m_CellToForgetWeights == nullptr ||
+ params.m_CellToOutputWeights == nullptr ||
+ (!desc.m_CifgEnabled && params.m_CellToInputWeights == nullptr)))
+ {
+ return Fail("%s: All, or none, of cell-to-forget weights and cell-to-output weights must be provided"
+ " and, if CIFG is not enabled, cell-to-input weights must also be provided", __func__);
+ }
+
+ if (desc.m_LayerNormEnabled &&
+ (params.m_ForgetLayerNormWeights == nullptr ||
+ params.m_CellLayerNormWeights == nullptr ||
+ params.m_OutputLayerNormWeights == nullptr ||
+ (!desc.m_CifgEnabled && params.m_InputLayerNormWeights == nullptr)))
+ {
+ return Fail("%s: All, or none, of forget-norm weights, cell-norm weights and output-norm weights must be"
+ " provided and, if CIFG is not enabled, input-norm weights must also be provided", __func__);
+ }
+
+ // Check if the layer is supported
+ // Inputs
+ const TensorInfo& inputInfo = input.GetTensorInfo();
+ const TensorInfo& outputStateInInfo = outputStateIn.GetTensorInfo();
+ const TensorInfo& cellStateInInfo = cellStateIn.GetTensorInfo();
+
+ // Outputs
+ const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ unsigned int batchSize = inputInfo.GetShape()[0];
+ unsigned int outputSize = outputInfo.GetShape()[2];
+ unsigned int numUnits = cellStateInInfo.GetShape()[1];
+
+ armnn::DataType dataType = inputInfo.GetDataType();
+ float qScale = inputInfo.GetQuantizationScale();
+ int qOffset = inputInfo.GetQuantizationOffset();
+
+ armnn::TensorInfo cellStateOutInfo({batchSize, numUnits}, cellStateInInfo.GetDataType(),
+ cellStateInInfo.GetQuantizationScale(), cellStateInInfo.GetQuantizationOffset());
+ armnn::TensorInfo outputStateOutInfo({batchSize, outputSize}, dataType, qScale, qOffset);
+
+ // Basic parameters
+ LstmInputParamsInfo paramsInfo;
+ paramsInfo.m_InputToForgetWeights = &(params.m_InputToForgetWeights->GetInfo());
+ paramsInfo.m_InputToCellWeights = &(params.m_InputToCellWeights->GetInfo());
+ paramsInfo.m_InputToOutputWeights = &(params.m_InputToOutputWeights->GetInfo());
+ paramsInfo.m_RecurrentToForgetWeights = &(params.m_RecurrentToForgetWeights->GetInfo());
+ paramsInfo.m_RecurrentToCellWeights = &(params.m_RecurrentToCellWeights->GetInfo());
+ paramsInfo.m_RecurrentToOutputWeights = &(params.m_RecurrentToOutputWeights->GetInfo());
+ paramsInfo.m_ForgetGateBias = &(params.m_ForgetGateBias->GetInfo());
+ paramsInfo.m_CellBias = &(params.m_CellBias->GetInfo());
+ paramsInfo.m_OutputGateBias = &(params.m_OutputGateBias->GetInfo());
+
+ // Optional parameters
+ if (!desc.m_CifgEnabled)
+ {
+ paramsInfo.m_InputToInputWeights = &(params.m_InputToInputWeights->GetInfo());
+ paramsInfo.m_RecurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo());
+ if (params.m_CellToInputWeights != nullptr)
+ {
+ paramsInfo.m_CellToInputWeights = &(params.m_CellToInputWeights->GetInfo());
+ }
+ paramsInfo.m_InputGateBias = &(params.m_InputGateBias->GetInfo());
+ }
+
+ if (desc.m_ProjectionEnabled)
+ {
+ paramsInfo.m_ProjectionWeights = &(params.m_ProjectionWeights->GetInfo());
+ if (params.m_ProjectionBias != nullptr)
+ {
+ paramsInfo.m_ProjectionBias = &(params.m_ProjectionBias->GetInfo());
+ }
+ }
+
+ if (desc.m_PeepholeEnabled)
+ {
+ paramsInfo.m_CellToForgetWeights = &(params.m_CellToForgetWeights->GetInfo());
+ paramsInfo.m_CellToOutputWeights = &(params.m_CellToOutputWeights->GetInfo());
+ }
+
+ if (desc.m_LayerNormEnabled)
+ {
+ if(!desc.m_CifgEnabled)
+ {
+ paramsInfo.m_InputLayerNormWeights = &(params.m_InputLayerNormWeights->GetInfo());
+ }
+ paramsInfo.m_ForgetLayerNormWeights = &(params.m_ForgetLayerNormWeights->GetInfo());
+ paramsInfo.m_CellLayerNormWeights = &(params.m_CellLayerNormWeights->GetInfo());
+ paramsInfo.m_OutputLayerNormWeights = &(params.m_OutputLayerNormWeights->GetInfo());
+ }
+
+ bool isSupported = false;
+ armnn::BackendId setBackend;
+ auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+ {
+ FORWARD_LAYER_SUPPORT_FUNC(__func__,
+ IsUnidirectionalSequenceLstmSupported,
+ data.m_Backends,
+ isSupported,
+ setBackend,
+ inputInfo,
+ outputStateInInfo,
+ cellStateInInfo,
+ outputStateOutInfo,
+ cellStateOutInfo,
+ outputInfo,
+ desc,
+ paramsInfo);
+ };
+
+ bool isDynamic = false;
+ if (!IsDynamicTensor(outputInfo))
+ {
+ validateFunc(outputInfo, isSupported);
+ }
+ else
+ {
+ isDynamic = true;
+ isSupported = AreDynamicTensorsSupported();
+ }
+
+ if (!isSupported)
+ {
+ return false;
+ }
+
+ // Add the layer
+ IConnectableLayer* layer = data.m_Network->AddUnidirectionalSequenceLstmLayer(desc,
+ params,
+ "UnidirectionalSequenceLstm");
+ layer->SetBackendId(setBackend);
+
+ input.Connect(layer->GetInputSlot(0));
+ outputStateIn.Connect(layer->GetInputSlot(1));
+ cellStateIn.Connect(layer->GetInputSlot(2));
+
+ if (!isDynamic)
+ {
+ return (SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, 2, model, data));
+ }
+ else
+ {
+ return (SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, 2, model, data, nullptr,
+ validateFunc, ActivationFn::kActivationNone, true));
+ }
+}
+
} // armnn_driver namespace \ No newline at end of file
diff --git a/ConversionUtils_1_3.hpp b/ConversionUtils_1_3.hpp
index 150735e9..761b1899 100644
--- a/ConversionUtils_1_3.hpp
+++ b/ConversionUtils_1_3.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020,2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -131,10 +131,12 @@ bool ConvertFill(const HalOperation& operation, const HalModel& model, Conversio
}
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsFillSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputInfo,
descriptor);
@@ -144,7 +146,11 @@ bool ConvertFill(const HalOperation& operation, const HalModel& model, Conversio
}
IConnectableLayer* const layer = data.m_Network->AddFillLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the FillLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data);
@@ -184,13 +190,14 @@ bool ConvertLogicalBinary(const HalOperation& operation,
LogicalBinaryDescriptor descriptor(logicalOperation);
bool isSupported = false;
-
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsLogicalBinarySupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo0,
inputInfo1,
outputInfo,
@@ -212,7 +219,11 @@ bool ConvertLogicalBinary(const HalOperation& operation,
}
IConnectableLayer* layer = data.m_Network->AddLogicalBinaryLayer(descriptor);
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the LogicalBinaryLayer", __func__);
+ }
bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data);
if (!isReshapeSupported)
@@ -674,12 +685,14 @@ bool ConvertQuantizedLstm(const HalOperation& operation, const HalModel& model,
// Check if the layer is supported
bool isSupported = false;
+ armnn::BackendId setBackend;
auto validateFunc = [&](const armnn::TensorInfo& cellStateOutInfo, bool& isSupported)
{
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsQLstmSupported,
data.m_Backends,
isSupported,
+ setBackend,
inputInfo,
outputStatePrevTimeStepInfo,
cellStatePrevTimeStepInfo,
@@ -710,6 +723,7 @@ bool ConvertQuantizedLstm(const HalOperation& operation, const HalModel& model,
// Add the layer
IConnectableLayer* layer = data.m_Network->AddQLstmLayer(desc, params, "QLstm");
+ layer->SetBackendId(setBackend);
input.Connect(layer->GetInputSlot(0));
outputStatePrevTimeStep.Connect(layer->GetInputSlot(1));
@@ -764,10 +778,12 @@ bool ConvertRank(const HalOperation& operation, const HalModel& model, Conversio
}
bool isSupported = false;
+ armnn::BackendId setBackend;
FORWARD_LAYER_SUPPORT_FUNC(__func__,
IsRankSupported,
data.m_Backends,
isSupported,
+ setBackend,
input.GetTensorInfo(),
outInfo);
if (!isSupported)
@@ -776,7 +792,11 @@ bool ConvertRank(const HalOperation& operation, const HalModel& model, Conversio
}
armnn::IConnectableLayer* layer = data.m_Network->AddRankLayer();
- assert(layer != nullptr);
+ layer->SetBackendId(setBackend);
+ if (!layer)
+ {
+ return Fail("%s: Could not add the RankLayer", __func__);
+ }
input.Connect(layer->GetInputSlot(0));
return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, &outInfo);
diff --git a/DriverOptions.cpp b/DriverOptions.cpp
index 42f7ea9e..2998917f 100644
--- a/DriverOptions.cpp
+++ b/DriverOptions.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -17,7 +17,6 @@
#include <cxxopts/cxxopts.hpp>
#include <algorithm>
-#include <cassert>
#include <functional>
#include <string>
#include <sstream>
@@ -39,6 +38,10 @@ DriverOptions::DriverOptions(armnn::Compute computeDevice, bool fp16Enabled)
, m_ShouldExit(false)
, m_SaveCachedNetwork(false)
, m_NumberOfThreads(0)
+ , m_EnableAsyncModelExecution(false)
+ , m_ArmnnNumberOfThreads(1)
+ , m_EnableImport(false)
+ , m_EnableExport(false)
{
}
@@ -53,6 +56,10 @@ DriverOptions::DriverOptions(const std::vector<armnn::BackendId>& backends, bool
, m_ShouldExit(false)
, m_SaveCachedNetwork(false)
, m_NumberOfThreads(0)
+ , m_EnableAsyncModelExecution(false)
+ , m_ArmnnNumberOfThreads(1)
+ , m_EnableImport(false)
+ , m_EnableExport(false)
{
}
@@ -66,6 +73,10 @@ DriverOptions::DriverOptions(int argc, char** argv)
, m_ShouldExit(false)
, m_SaveCachedNetwork(false)
, m_NumberOfThreads(0)
+ , m_EnableAsyncModelExecution(false)
+ , m_ArmnnNumberOfThreads(1)
+ , m_EnableImport(false)
+ , m_EnableExport(false)
{
std::string unsupportedOperationsAsString;
std::string clTunedParametersModeAsString;
@@ -154,7 +165,22 @@ DriverOptions::DriverOptions(int argc, char** argv)
cxxopts::value<bool>(m_VerboseLogging)->default_value("false"))
("V,version", "Show version information",
- cxxopts::value<bool>(showVersion)->default_value("false"));
+ cxxopts::value<bool>(showVersion)->default_value("false"))
+
+ ("A,asyncModelExecution", "Enable AsynModel Execution",
+ cxxopts::value<bool>(m_EnableAsyncModelExecution)->default_value("false"))
+
+ ("T,armnn-threads",
+ "Assign the number of threads used by ArmNN. "
+ "Input value must be at least 1. "
+ "Default is set to 1.",
+ cxxopts::value<unsigned int>(m_ArmnnNumberOfThreads)->default_value("1"))
+
+ ("I,enableImport", "Enable Importing of input buffers",
+ cxxopts::value<bool>(m_EnableImport)->default_value("false"))
+
+ ("E,enableExport", "Enable Exporting of output buffers",
+ cxxopts::value<bool>(m_EnableExport)->default_value("false"));
}
catch (const std::exception& e)
{
@@ -168,7 +194,7 @@ DriverOptions::DriverOptions(int argc, char** argv)
{
cxxopts::ParseResult result = optionsDesc.parse(argc, argv);
}
- catch (const cxxopts::OptionException& e)
+ catch (const cxxopts::exceptions::exception& e)
{
ALOGW("An exception occurred attempting to parse program options: %s", e.what());
std::cout << optionsDesc.help() << std::endl
diff --git a/DriverOptions.hpp b/DriverOptions.hpp
index 8b3f5743..ee68a945 100644
--- a/DriverOptions.hpp
+++ b/DriverOptions.hpp
@@ -40,6 +40,10 @@ public:
const std::string& GetCachedNetworkFilePath() const { return m_CachedNetworkFilePath; }
bool SaveCachedNetwork() const { return m_SaveCachedNetwork; }
unsigned int GetNumberOfThreads() const { return m_NumberOfThreads; }
+ bool isAsyncModelExecutionEnabled() const { return m_EnableAsyncModelExecution; };
+ unsigned int getNoOfArmnnThreads() const { return m_ArmnnNumberOfThreads; };
+ bool isImportEnabled() const { return m_EnableImport; };
+ bool isExportEnabled() const { return m_EnableExport; };
private:
std::vector<armnn::BackendId> m_Backends;
@@ -59,6 +63,10 @@ private:
std::string m_CachedNetworkFilePath;
bool m_SaveCachedNetwork;
unsigned int m_NumberOfThreads;
+ bool m_EnableAsyncModelExecution;
+ unsigned int m_ArmnnNumberOfThreads;
+ bool m_EnableImport;
+ bool m_EnableExport;
};
} // namespace armnn_driver
diff --git a/LICENSE.spdx b/LICENSE.spdx
new file mode 100644
index 00000000..985e90f5
--- /dev/null
+++ b/LICENSE.spdx
@@ -0,0 +1,756 @@
+SPDXVersion: SPDX-2.1
+DataLicense: CC0-1.0
+SPDXID: SPDXRef-DOCUMENT
+DocumentName: android-nn-driver
+DocumentNamespace: http://spdx.org/spdxdocs/spdx-v2.1-52f0ad2d-5aaf-4639-824f-433fd3f75059
+Creator: Person: Anonymous ()
+Creator: Organization: Anonymous ()
+Creator: Tool: reuse-1.0.0
+Created: 2022-08-03T11:15:19Z
+CreatorComment: <text>This document was created automatically using available reuse information consistent with REUSE.</text>
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9f8259108df712c403827fb71c15e65a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-a43f0edf0ce5df76c5fa0b729273ea77
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-bf1cf5f58ec86ae7e11a3c3c8b474034
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-cecaaa5d4dda56381f9f7a9cd61ff7e9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-f143870cf1668a0e1921bc40fdb66f54
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-00a23dbae66843f432e28118661839d4
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-3256f8866910230cf6a7a73811af0cf9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-2cc451461228edab191f273bf18b1bb3
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-d98af2dbf0e85ae4bc5f7b43ea7a954a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-c88a629746090e9a0d3827215e2bfd51
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-cae6fc1961249d6881aca965eb58e004
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-e46a1911e4d9059307bd22dc2d912cb1
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-ea85c3000a503cbd9bd1b1f84e9fad86
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-4a1090b46195ace5630ea4a007850de7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-385bcf1658d8867efb2b32b1500f09d4
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b521d1dfe6989e64400469fdebd831c9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-8bc8b64290ab46c0fb3219b9ec81a869
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-8463e614db8ba40e6e1945c7b4b18295
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b829f130ef058df6a26066043a87a7f2
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-df339a6ebf0b4933a48663cccf1cf20a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b850f00ee638309c5b8d15037fac6df6
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-ca9a486e79c5c263b3d3f536a4565d83
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-67139f126de1f029b3bd651cd63b1bd6
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-be18403f7bc7bf15c37c7f64298b3c3d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9f5b20dfa312b6e2f9a8e1bf2b0b3503
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b825919ce48fdb7516cbc362e6393f29
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-edaddcd2dd335d3ece1304a6554ba51d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-7856d093259efa6362ed229b843e3bf4
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-965719fd5d318ab9d90f4ef97d6e5b0f
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-531050aa1791fd697a5dc40c9a177dd4
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-6776eb54bb2ecc6d49e627dbe41622b7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b6f8185ee2082a086ab3c079d6b39244
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-8507d62cd47bf366b75f14567d47267e
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-ccf8d55a9e9058f0dcf7c363c703d32d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-092945a8bc4261b051b787bd591a7714
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-655f5885a812698027dcabea1589d5f4
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-649cb81767be10c5da180796b9ddb4a7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9f7e9dd3a20f1713c3fdc385273c66fa
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-f32963290737441c8d622ba4d084b5c8
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-567e8339bfeba1cb3c6421c08837b35c
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-03ea74a5d4f4f43725bd95894fdec8dd
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-8a7249a37de6af18cd2d550cff6010f9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-c71eb7532243e3ca46e017d669533dba
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-22c04f1ed16ebce9712667a1c38a372a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-e11be25fc4287567b7a0fa7ac82f1bb9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-1dcf36d546829f1e3054e855e00ba041
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b712c2b9f228f16280dd638e9509a61d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-e9e436995bae1a91cc8eca98e9b807fd
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-07efeb8174fbf736a199dc31e12c5e72
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-37f8670d1beb81082869bc84b163efab
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-342850c8fa79d64844c1094ab1610c1c
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-147dd65486ab91d9c3c92070f0bfdeb0
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-de4084667ef7b12bc58e12fdd2191c69
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b2047de27559c407bef406aff2b40d94
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9f4bb06fba519fb76f51f8943f6ebae9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-30b64008afa71760eabc7da521c1889d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-baf669b9e92e00c5362dbc82809d5bde
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-930a2fdcf68b3d60760bcd4b005da53a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-ddc27b0006a37f1b787e8b6162daf3d8
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-37595def2f9768995c35e05cb7997d5b
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-815280de5c96988091018b623655b38f
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-d2adb23643a916d18c659c84820d7c6a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-29371c36e8e4df94cfddca2fc1d126ba
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-cd71ee2ca480910d96b8b0d8d11f7ad0
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-e66a2501ad56e8b4c34ec503ea107499
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-f0b928f9591fbb77b0fd4f581e328118
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-ec80697484956fb568cffa59d1176c99
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-3ee20d00476f82587311b7af6755ba87
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-a2c1c0f69d8cae0e305c6588f5a1b7f1
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-a25ef944bedd8c0ebed511a2acf49ec5
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-6efb1f07d269b9e0980b06617c8a715b
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-3f448b727744f3cb9b0779c05699e288
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-dde9ae64190be94ab573edb16facee76
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-740f541502a5a7bdef15c23c22f921dd
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-815f4a4f014436097f35a2933a78c782
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-141d09b3dcfb086fa13ed4d1d7dc72c7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b608c8e94bd050f57df20a7b3c3578f7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9af5a132c2718f7f9cd37780855d5dc7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-835e7b213035e712703efb1919160385
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-3ac1d54ace29e86b09ef0f7040ce499a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-d4683c7840fdbbe5db1883d20f39e36f
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-106e0283ae070f56090f2d8df4aa23bf
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-390a1372ba683ef9a6008dacad3d7189
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-7ce28875cc79ab21c78d42ecdb2ba887
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9ea405115778d8bc6828cb805c59d965
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-4c471c4784291b124a329da98dbb5721
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-5aa36b3d6a82f93da9cc87f793dd596d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-2994f5b9a6418a8785a66cb0c974b6fd
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-2cd09106f5468654db71e97033eda634
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-233ae563722a5bc6f6fd82d5ca664e9b
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-2d49afc0410844dd4d5fb753d0d2c4d8
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-05f43bb75f39ece251d8a20052bcfd61
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-94799b14d1b2f5eb08bf3204677984f1
+
+FileName: ./.gitignore
+SPDXID: SPDXRef-9f8259108df712c403827fb71c15e65a
+FileChecksum: SHA1: 20e5140c5f3d7d2bb9447e20e86fc36899042ebc
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018, 2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./1.0/ArmnnDriver.hpp
+SPDXID: SPDXRef-a43f0edf0ce5df76c5fa0b729273ea77
+FileChecksum: SHA1: a8f69427c8291cd8237349d637a4e8be518109c0
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.0/ArmnnDriverImpl.cpp
+SPDXID: SPDXRef-bf1cf5f58ec86ae7e11a3c3c8b474034
+FileChecksum: SHA1: f59f11e39817ddf3707b0ac10e512357b4c453dc
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.0/ArmnnDriverImpl.hpp
+SPDXID: SPDXRef-cecaaa5d4dda56381f9f7a9cd61ff7e9
+FileChecksum: SHA1: 9da9087071f65ca982a6107f2459cbd25e90f30b
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.0/FullyConnected.hpp
+SPDXID: SPDXRef-f143870cf1668a0e1921bc40fdb66f54
+FileChecksum: SHA1: 637f01696648b3d45ff7fa923bdf9d1dc69d28af
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.0/HalPolicy.cpp
+SPDXID: SPDXRef-00a23dbae66843f432e28118661839d4
+FileChecksum: SHA1: 91f1c7cadf4a257ff227a828a399be98a418bbbe
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./1.0/HalPolicy.hpp
+SPDXID: SPDXRef-3256f8866910230cf6a7a73811af0cf9
+FileChecksum: SHA1: dcb315bcb1d3f3da0a9c3c0613b88927f478549f
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.1/ArmnnDriver.hpp
+SPDXID: SPDXRef-2cc451461228edab191f273bf18b1bb3
+FileChecksum: SHA1: 77f4cf04748296e452f8af5d3b039122fdd128d7
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.1/ArmnnDriverImpl.cpp
+SPDXID: SPDXRef-d98af2dbf0e85ae4bc5f7b43ea7a954a
+FileChecksum: SHA1: 0f1fd3966716c0b12250d126f9e81e7c4255250e
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.1/ArmnnDriverImpl.hpp
+SPDXID: SPDXRef-c88a629746090e9a0d3827215e2bfd51
+FileChecksum: SHA1: e3d2b504055de7abe3bf788e90f575dd1025bd85
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.1/HalPolicy.cpp
+SPDXID: SPDXRef-cae6fc1961249d6881aca965eb58e004
+FileChecksum: SHA1: 3ce067043eaeb91850116ca6d96bf2cc157e0b25
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.1/HalPolicy.hpp
+SPDXID: SPDXRef-e46a1911e4d9059307bd22dc2d912cb1
+FileChecksum: SHA1: 9622c84ff7b21205560a852f6937c9f6294726fe
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.2/ArmnnDriver.hpp
+SPDXID: SPDXRef-ea85c3000a503cbd9bd1b1f84e9fad86
+FileChecksum: SHA1: a3139ec9145218e6135ba01a28915b7eaa978030
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.2/ArmnnDriverImpl.cpp
+SPDXID: SPDXRef-4a1090b46195ace5630ea4a007850de7
+FileChecksum: SHA1: 5314d9584fde5478496ae4a5e21dae2e4cff9b30
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.2/ArmnnDriverImpl.hpp
+SPDXID: SPDXRef-385bcf1658d8867efb2b32b1500f09d4
+FileChecksum: SHA1: da57c0309a90ccccc2548a5f5980646e8034fbc8
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.2/HalPolicy.cpp
+SPDXID: SPDXRef-b521d1dfe6989e64400469fdebd831c9
+FileChecksum: SHA1: 3c36a47c20cbccaff4ab066cdd70415588e7b2dc
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.2/HalPolicy.hpp
+SPDXID: SPDXRef-8bc8b64290ab46c0fb3219b9ec81a869
+FileChecksum: SHA1: a5f76412d0382b3a1dfa1cd660d6bcaf3611005c
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.3/ArmnnDriver.hpp
+SPDXID: SPDXRef-8463e614db8ba40e6e1945c7b4b18295
+FileChecksum: SHA1: 2df533c72232ee6dd03485e24ea09f39d11936ab
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.3/ArmnnDriverImpl.cpp
+SPDXID: SPDXRef-b829f130ef058df6a26066043a87a7f2
+FileChecksum: SHA1: b2fc505c82f7818ac3c1724d9793c74f64499b4c
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.3/ArmnnDriverImpl.hpp
+SPDXID: SPDXRef-df339a6ebf0b4933a48663cccf1cf20a
+FileChecksum: SHA1: cfc89a2aeb1019f6073a9608284874bf7f5fad1b
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.3/HalPolicy.cpp
+SPDXID: SPDXRef-b850f00ee638309c5b8d15037fac6df6
+FileChecksum: SHA1: d40e4d1c8d2ddae7b405049a6c2925bf6920b066
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.3/HalPolicy.hpp
+SPDXID: SPDXRef-ca9a486e79c5c263b3d3f536a4565d83
+FileChecksum: SHA1: d5e11e86d3e7e9b7c411137d02b177d47878af65
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./Android.bp
+SPDXID: SPDXRef-67139f126de1f029b3bd651cd63b1bd6
+FileChecksum: SHA1: b6eee11f4b5dec7d0edc76cc6a7f3096c0626aeb
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 ARM Ltd. All rights reserved.</text>
+
+FileName: ./Android.mk
+SPDXID: SPDXRef-be18403f7bc7bf15c37c7f64298b3c3d
+FileChecksum: SHA1: d2c012577d70974378211d3ee8ea290a61b52fa7
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2022 ARM Ltd. and Contributors. All rights reserved.</text>
+
+FileName: ./ArmnnDevice.cpp
+SPDXID: SPDXRef-9f5b20dfa312b6e2f9a8e1bf2b0b3503
+FileChecksum: SHA1: 5164605fe75cbebe13c4ae3c026789212710f4db
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnDevice.hpp
+SPDXID: SPDXRef-b825919ce48fdb7516cbc362e6393f29
+FileChecksum: SHA1: 6e49e8fe37b39ca1381a403e227c664cb5c0f07e
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnDriver.hpp
+SPDXID: SPDXRef-edaddcd2dd335d3ece1304a6554ba51d
+FileChecksum: SHA1: 537df248b2cd55405b3df65d3808c3ef41fd0aa1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnDriverImpl.cpp
+SPDXID: SPDXRef-7856d093259efa6362ed229b843e3bf4
+FileChecksum: SHA1: 36395b80efda8d6dce9a9e5fe4e9f45083eedf36
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnDriverImpl.hpp
+SPDXID: SPDXRef-965719fd5d318ab9d90f4ef97d6e5b0f
+FileChecksum: SHA1: 4e36f763d315c5db72d0cb0b4477411800abbd96
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel.cpp
+SPDXID: SPDXRef-531050aa1791fd697a5dc40c9a177dd4
+FileChecksum: SHA1: 440468c755b83402a9f308a7fc1e9bc860077c08
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel.hpp
+SPDXID: SPDXRef-6776eb54bb2ecc6d49e627dbe41622b7
+FileChecksum: SHA1: ba7af89983699d749ab6305415b7dcaba2b2c981
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel_1_2.cpp
+SPDXID: SPDXRef-b6f8185ee2082a086ab3c079d6b39244
+FileChecksum: SHA1: e4d46acd9483cb285bc681a74c2f399593ea7f6c
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel_1_2.hpp
+SPDXID: SPDXRef-8507d62cd47bf366b75f14567d47267e
+FileChecksum: SHA1: 742b7ac96b7c7e96ec8b956214dd3211098d70b8
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel_1_3.cpp
+SPDXID: SPDXRef-ccf8d55a9e9058f0dcf7c363c703d32d
+FileChecksum: SHA1: c70810b740e24cff02d7caf4f7f8aa5aab37cfb6
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel_1_3.hpp
+SPDXID: SPDXRef-092945a8bc4261b051b787bd591a7714
+FileChecksum: SHA1: 7b3b0fdfb2bb3fc4d0e5764171a3549a8c8978bd
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./CacheDataHandler.cpp
+SPDXID: SPDXRef-655f5885a812698027dcabea1589d5f4
+FileChecksum: SHA1: 445629336054df90c9099b9b75797ed6811dd779
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2021 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./CacheDataHandler.hpp
+SPDXID: SPDXRef-649cb81767be10c5da180796b9ddb4a7
+FileChecksum: SHA1: 26ae481a852dd13093a1c9edf25ba183f2d35fc5
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2021 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ConversionUtils.cpp
+SPDXID: SPDXRef-9f7e9dd3a20f1713c3fdc385273c66fa
+FileChecksum: SHA1: 0c8905ac2bb01f0be67c7ebe32d50554d88864a2
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ConversionUtils.hpp
+SPDXID: SPDXRef-f32963290737441c8d622ba4d084b5c8
+FileChecksum: SHA1: 8084c3cbe39122af1061c98cfd2872bbb363f281
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ConversionUtils_1_2.hpp
+SPDXID: SPDXRef-567e8339bfeba1cb3c6421c08837b35c
+FileChecksum: SHA1: bff1d8c18281b0d2a77b33a92f67acfb321c4a87
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ConversionUtils_1_3.hpp
+SPDXID: SPDXRef-03ea74a5d4f4f43725bd95894fdec8dd
+FileChecksum: SHA1: af14ef930e27aae57ae6f0cb7031b3fac5eb3009
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./DriverOptions.cpp
+SPDXID: SPDXRef-8a7249a37de6af18cd2d550cff6010f9
+FileChecksum: SHA1: d141aea6e242783c111d0a6d37e619fa7397d10d
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./DriverOptions.hpp
+SPDXID: SPDXRef-c71eb7532243e3ca46e017d669533dba
+FileChecksum: SHA1: 9684228ad74e75018325e92902a4d3dc871ef6a8
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ModelToINetworkConverter.cpp
+SPDXID: SPDXRef-22c04f1ed16ebce9712667a1c38a372a
+FileChecksum: SHA1: abede3be1ee6b689512d4720f9a64bea307aeb6f
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ModelToINetworkConverter.hpp
+SPDXID: SPDXRef-e11be25fc4287567b7a0fa7ac82f1bb9
+FileChecksum: SHA1: 14a9c3b1b8196f05d82f514a584e5c57cf3f0f99
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./NnapiSupport.txt
+SPDXID: SPDXRef-1dcf36d546829f1e3054e855e00ba041
+FileChecksum: SHA1: 821af6773ff138f88246e2154b80c4c9ec8412d9
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018-2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./README.md
+SPDXID: SPDXRef-b712c2b9f228f16280dd638e9509a61d
+FileChecksum: SHA1: af3dcb14667d2fd936c87704d362704f18224c76
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018-2021 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./RequestThread.cpp
+SPDXID: SPDXRef-e9e436995bae1a91cc8eca98e9b807fd
+FileChecksum: SHA1: cb92a755b3081985429649b6a4baf5930d869b82
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./RequestThread.hpp
+SPDXID: SPDXRef-07efeb8174fbf736a199dc31e12c5e72
+FileChecksum: SHA1: 48c759a5bf72b34d9f8b4fb2b7e9722e923561d9
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./RequestThread_1_3.cpp
+SPDXID: SPDXRef-37f8670d1beb81082869bc84b163efab
+FileChecksum: SHA1: a1c383202dc7fea2c6a78d0065aeb2edf33eda10
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./RequestThread_1_3.hpp
+SPDXID: SPDXRef-342850c8fa79d64844c1094ab1610c1c
+FileChecksum: SHA1: f49a15fca5bfb1bdf4e5e4718ed01b3124047894
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./SECURITY.md
+SPDXID: SPDXRef-147dd65486ab91d9c3c92070f0bfdeb0
+FileChecksum: SHA1: 5c7051d2c36aa1671d35aa1ef5f830be26929c47
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./SystemPropertiesUtils.hpp
+SPDXID: SPDXRef-de4084667ef7b12bc58e12fdd2191c69
+FileChecksum: SHA1: 6ad8daffbc795aad50bd00f316b40565323eeae1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./Utils.cpp
+SPDXID: SPDXRef-b2047de27559c407bef406aff2b40d94
+FileChecksum: SHA1: 76c8badff7865de215abe395e79e8a059a97ccb5
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./Utils.hpp
+SPDXID: SPDXRef-9f4bb06fba519fb76f51f8943f6ebae9
+FileChecksum: SHA1: 615c59ebc06d526534dadab7e4a9589a22933de1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./android-nn-driver.IVGCVSW-7090.patch
+SPDXID: SPDXRef-30b64008afa71760eabc7da521c1889d
+FileChecksum: SHA1: 35fbf73bed3f1eb2cad0de08475b88853cc918d7
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright (c) <year> <copyright holders>
+Copyright © 2018, 2022 Arm Ltd and Contributors. All rights reserved.
+Copyright © 2018-2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./android.hardware.neuralnetworks@1.0-service-armnn.rc
+SPDXID: SPDXRef-baf669b9e92e00c5362dbc82809d5bde
+FileChecksum: SHA1: 8fa18b080109a4e60839b5d1d635af28e9eef847
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018, 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./android.hardware.neuralnetworks@1.1-service-armnn.rc
+SPDXID: SPDXRef-930a2fdcf68b3d60760bcd4b005da53a
+FileChecksum: SHA1: 7f085302dd79d18f958c0779d1918871e55f816a
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018, 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./android.hardware.neuralnetworks@1.2-service-armnn.rc
+SPDXID: SPDXRef-ddc27b0006a37f1b787e8b6162daf3d8
+FileChecksum: SHA1: 14f30ecab9ddab0ca1589c140f7104835e83025f
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2019-2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./android.hardware.neuralnetworks@1.3-service-armnn.rc
+SPDXID: SPDXRef-37595def2f9768995c35e05cb7997d5b
+FileChecksum: SHA1: 759eff0251977e333141d4383ff08bb41debd395
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./docs/FAQ.md
+SPDXID: SPDXRef-815280de5c96988091018b623655b38f
+FileChecksum: SHA1: ed059961071be063db41ea7378030e9279256725
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2019-2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./docs/IntegratorGuide.md
+SPDXID: SPDXRef-d2adb23643a916d18c659c84820d7c6a
+FileChecksum: SHA1: 16ce5aa116603d2a009b209a91b57d73271d2c0b
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2019-2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./service.cpp
+SPDXID: SPDXRef-29371c36e8e4df94cfddca2fc1d126ba
+FileChecksum: SHA1: 763c8edce64123fcb36c182acbdaa8a7d1b44782
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./setup.sh
+SPDXID: SPDXRef-cd71ee2ca480910d96b8b0d8d11f7ad0
+FileChecksum: SHA1: 531cfa9f242d7c718e8d07f56564c9782a046d4f
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018, 2020-022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.0/Convolution2D.cpp
+SPDXID: SPDXRef-e66a2501ad56e8b4c34ec503ea107499
+FileChecksum: SHA1: 128550351b04d32459edd674f9c9d669f57f1b06
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.0/FullyConnectedReshape.cpp
+SPDXID: SPDXRef-f0b928f9591fbb77b0fd4f581e328118
+FileChecksum: SHA1: 1ae9abf76553388bc2e6245fed9b8a65a8f62706
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.0/Lstm.cpp
+SPDXID: SPDXRef-ec80697484956fb568cffa59d1176c99
+FileChecksum: SHA1: 5727d5f855f102a6b6704847adf4944b28f48aef
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.1/Convolution2D.cpp
+SPDXID: SPDXRef-3ee20d00476f82587311b7af6755ba87
+FileChecksum: SHA1: 03db94ba9ca8bd50266e46b9ad771ac776083414
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.1/Lstm.cpp
+SPDXID: SPDXRef-a2c1c0f69d8cae0e305c6588f5a1b7f1
+FileChecksum: SHA1: bd8899a7ce02d366cde07b1bc09548e841b0289e
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.1/Mean.cpp
+SPDXID: SPDXRef-a25ef944bedd8c0ebed511a2acf49ec5
+FileChecksum: SHA1: 55a55089a6b6a1dfadb25254caf0c1c00464e81b
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.1/Transpose.cpp
+SPDXID: SPDXRef-6efb1f07d269b9e0980b06617c8a715b
+FileChecksum: SHA1: 5ff1b93fad61526f720be64cdb1db99cde3f3867
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.2/Capabilities.cpp
+SPDXID: SPDXRef-3f448b727744f3cb9b0779c05699e288
+FileChecksum: SHA1: af172eb6b3911c53764662e84ba4b54948566951
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.2/Dilation.cpp
+SPDXID: SPDXRef-dde9ae64190be94ab573edb16facee76
+FileChecksum: SHA1: a42f16142851e48f6c25ba62dfb59b85cbbd707c
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.2/Lstm.cpp
+SPDXID: SPDXRef-740f541502a5a7bdef15c23c22f921dd
+FileChecksum: SHA1: 417055ce2712481721b4610e0794ff45c4fad5e0
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.2/UnidirectionalSequenceLstm.cpp
+SPDXID: SPDXRef-815f4a4f014436097f35a2933a78c782
+FileChecksum: SHA1: ef80888be3ffee282501ae7b24112c1fe5aef530
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.3/QLstm.cpp
+SPDXID: SPDXRef-141d09b3dcfb086fa13ed4d1d7dc72c7
+FileChecksum: SHA1: b153faa2047b69fa50c61d8b571ae6c56f30a6c1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.3/QosTests.cpp
+SPDXID: SPDXRef-b608c8e94bd050f57df20a7b3c3578f7
+FileChecksum: SHA1: 581cd7da71d8c437eec87f7a6883639cf03c87d3
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Android.mk
+SPDXID: SPDXRef-9af5a132c2718f7f9cd37780855d5dc7
+FileChecksum: SHA1: 39ed4f7eb099c95e3b322ab33884addd8e8679a1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 ARM Ltd. All rights reserved.</text>
+
+FileName: ./test/Concat.cpp
+SPDXID: SPDXRef-835e7b213035e712703efb1919160385
+FileChecksum: SHA1: bf25dc393b3886450e887e3f00f1866be2ec6a91
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Concurrent.cpp
+SPDXID: SPDXRef-3ac1d54ace29e86b09ef0f7040ce499a
+FileChecksum: SHA1: f67c8bbb69c38ce9fc29e89780f183be49e9c7b7
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Convolution2D.hpp
+SPDXID: SPDXRef-d4683c7840fdbbe5db1883d20f39e36f
+FileChecksum: SHA1: b4bf0e39e5b1e754dbbb62d22ee8010edf14ed5e
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Dilation.hpp
+SPDXID: SPDXRef-106e0283ae070f56090f2d8df4aa23bf
+FileChecksum: SHA1: 065c5e5c9dde792785f7f58eb1498f216efb8cf1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/DriverTestHelpers.cpp
+SPDXID: SPDXRef-390a1372ba683ef9a6008dacad3d7189
+FileChecksum: SHA1: baf7aa187cfa11c4b3a69a00a69262e5ced445bf
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/DriverTestHelpers.hpp
+SPDXID: SPDXRef-7ce28875cc79ab21c78d42ecdb2ba887
+FileChecksum: SHA1: fa779ca557ae7bebcfefecb1faded7ac783f5924
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/FullyConnected.cpp
+SPDXID: SPDXRef-9ea405115778d8bc6828cb805c59d965
+FileChecksum: SHA1: 4a21a85071eaaac20d5705b1bceae1219bff607f
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/GenericLayerTests.cpp
+SPDXID: SPDXRef-4c471c4784291b124a329da98dbb5721
+FileChecksum: SHA1: 0587067149b5065ef082a788ca5194e88653ec36
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Lstm.hpp
+SPDXID: SPDXRef-5aa36b3d6a82f93da9cc87f793dd596d
+FileChecksum: SHA1: 193309453c30280b2fbc96cf756d8b03e080b856
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/SystemProperties.cpp
+SPDXID: SPDXRef-2994f5b9a6418a8785a66cb0c974b6fd
+FileChecksum: SHA1: 34cc21f95a76e864d0d77e68bd374120ba50fd9d
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/TestTensor.cpp
+SPDXID: SPDXRef-2cd09106f5468654db71e97033eda634
+FileChecksum: SHA1: c2869d7fba41a2c04af6d3ee8813c28b66c3fdaf
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/TestTensor.hpp
+SPDXID: SPDXRef-233ae563722a5bc6f6fd82d5ca664e9b
+FileChecksum: SHA1: cf3e2de34c3b526aadba3fb64381f7b520c21c03
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Tests.cpp
+SPDXID: SPDXRef-2d49afc0410844dd4d5fb753d0d2c4d8
+FileChecksum: SHA1: 90784980509b0d9b5cb27dc7486dae0c3f5818a2
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/UnidirectionalSequenceLstm.hpp
+SPDXID: SPDXRef-05f43bb75f39ece251d8a20052bcfd61
+FileChecksum: SHA1: e6eeadc76e06482371803a213b2ad9582456afac
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/UtilsTests.cpp
+SPDXID: SPDXRef-94799b14d1b2f5eb08bf3204677984f1
+FileChecksum: SHA1: 42157f2abe26fd24b755df1695670408749327a6
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
diff --git a/LICENSES/MIT.txt b/LICENSES/MIT.txt
new file mode 100644
index 00000000..2071b23b
--- /dev/null
+++ b/LICENSES/MIT.txt
@@ -0,0 +1,9 @@
+MIT License
+
+Copyright (c) <year> <copyright holders>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/ModelToINetworkConverter.cpp b/ModelToINetworkConverter.cpp
index 41716b0f..2dc302ed 100644
--- a/ModelToINetworkConverter.cpp
+++ b/ModelToINetworkConverter.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -35,7 +35,6 @@ ModelToINetworkConverter<HalPolicy>::ModelToINetworkConverter(const std::vector<
{
m_ConversionResult = ConversionResult::UnsupportedFeature;
ALOGE("%s: Unexpected exception: %s", __func__, e.what());
- assert(false);
}
}
@@ -95,7 +94,6 @@ void ModelToINetworkConverter<HalPolicy>::Convert()
ALOGV("ModelToINetworkConverter::Convert(): getMainModel(m_Model).operands[inputIndex];");
const HalOperand& operand = getMainModel(m_Model).operands[inputIndex];
ALOGV("ModelToINetworkConverter::Convert(): GetTensorInfoForOperand(operand)");
- const armnn::TensorInfo& tensor = GetTensorInfoForOperand(operand);
const std::string layerName = "Input_" + std::to_string(i);
ALOGV("ModelToINetworkConverter::Convert(): m_Data.m_Network->AddInputLayer(i, layerName.c_str())");
armnn::IConnectableLayer* layer = m_Data.m_Network->AddInputLayer(i, layerName.c_str());
@@ -192,12 +190,15 @@ void ModelToINetworkConverter<HalPolicy>::Convert()
{
// outputs in android nn are represented by operands
uint32_t outputIndex = getMainModel(m_Model).outputIndexes[i];
- const HalOperand& operand = getMainModel(m_Model).operands[outputIndex];
- const armnn::TensorInfo& tensor = GetTensorInfoForOperand(operand);
const std::string layerName = "Output_" + std::to_string(i);
armnn::IConnectableLayer* layer = m_Data.m_Network->AddOutputLayer(i, layerName.c_str());
- assert(m_Data.m_OutputSlotForOperand[outputIndex]);
+ if (!m_Data.m_OutputSlotForOperand[outputIndex])
+ {
+ Fail("%s: OutputSlot %i does not exist", __func__, outputIndex);
+ m_ConversionResult = ConversionResult::UnsupportedFeature;
+ break;
+ }
m_Data.m_OutputSlotForOperand[outputIndex]->Connect(layer->GetInputSlot(0));
}
}
@@ -213,7 +214,10 @@ template<typename HalPolicy>
bool ModelToINetworkConverter<HalPolicy>::IsOperationSupported(uint32_t operationIndex) const
{
std::map<uint32_t, bool>::const_iterator it = m_OperationSupported.find(operationIndex);
- assert(it != m_OperationSupported.end());
+ if (it == m_OperationSupported.end())
+ {
+ return Fail("%s: Unrecognised Operation Index: %i", __func__, operationIndex);
+ }
return it->second;
}
diff --git a/NnapiSupport.txt b/NnapiSupport.txt
index e0400e1f..545e7c76 100644
--- a/NnapiSupport.txt
+++ b/NnapiSupport.txt
@@ -14,76 +14,90 @@ For integration and usage documentation, please see README.md.
The following AndroidNN HAL 1.0, 1.1, 1.2 and 1.3 operations are currently supported:
AndroidNN operator Tensor type supported
-ABS (FLOAT32)
-ADD (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-ARGMAX (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-ARGMIN (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-AVERAGE_POOL_2D (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-BATCH_TO_SPACE_ND (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+ABS (FLOAT32, FLOAT16, INT32)
+ADD (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+ARGMAX (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+ARGMIN (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+AVERAGE_POOL_2D (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+BATCH_TO_SPACE_ND (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+CAST (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM)
+CHANNEL_SHUFFLE (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
CONCATENATION (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
CONV_2D (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
DEPTH_TO_SPACE (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
DEPTHWISE_CONV_2D (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
DEQUANTIZE (FLOAT32 (output only), QUANT8_ASYMM and QUANT8_ASYMM_SIGNED (input only))
-DIV (FLOAT32, QUANT8_ASYMM)
-ELU (FLOAT32, QUANT8_ASYMM)
-EQUAL (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+DIV (FLOAT32, FLOAT16, INT32)
+ELU (FLOAT32, FLOAT16, QUANT8_ASYMM)
+EQUAL (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
EXP (FLOAT32, FLOAT16)
-EXPAND_DIMS (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+EXPAND_DIMS (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
FILL (FLOAT32, FLOAT16, INT32)
-FLOOR (FLOAT32)
+FLOOR (FLOAT32, FLOAT16)
FULLY_CONNECTED (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-GREATER (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-GREATER_EQUAL (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-GROUPED_CONV_2D (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-HARD_SWISH (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-INSTANCE_NORMALIZATION (FLOAT32)
+GATHER (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+GREATER (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+GREATER_EQUAL (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+GROUPED_CONV_2D (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+HARD_SWISH (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+INSTANCE_NORMALIZATION (FLOAT32, FLOAT16)
L2_NORMALIZATION (FLOAT32)
-L2_POOL_2D (FLOAT32, QUANT8_ASYMM)
-LESS (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-LESS_EQUAL (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+L2_POOL_2D (FLOAT32, FLOAT16)
+LESS (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+LESS_EQUAL (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
LOCAL_RESPONSE_NORMALIZATION (FLOAT32)
+LOG (FLOAT32, FLOAT16)
LOGICAL_AND (BOOL8)
LOGICAL_NOT (BOOL8)
LOGICAL_OR (BOOL8)
-LOGISTIC (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-LOG_SOFTMAX (FLOAT32)
+LOGISTIC (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+LOG_SOFTMAX (FLOAT32, FLOAT16)
LSTM (FLOAT32)
-MAXIMUM (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-MAX_POOL_2D (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-MEAN (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-MINIMUM (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-MUL (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-NEG (FLOAT32)
-NOT_EQUAL (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+MAXIMUM (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+MAX_POOL_2D (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+MEAN (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+MINIMUM (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+MUL (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+NEG (FLOAT32, FLOAT16)
+NOT_EQUAL (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
PAD (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-PAD_V2 (FLOAT32, FLOAT16, QUANT8_ASYMM)
-PRELU (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+PAD_V2 (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+POW (FLOAT32, FLOAT16)
+PRELU (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
QUANTIZE (FLOAT32 (input only), QUANT8_ASYMM and QUANT8_ASYMM_SIGNED (output only))
QUANTIZED_16BIT_LSTM (QUANT8_ASYMM)
QUANTIZED_LSTM (QUANT8_ASYMM)
-RELU (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-RELU1 (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-RELU6 (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+RANK (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+REDUCE_MAX (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+REDUCE_MIN (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+REDUCE_PROD (FLOAT32, FLOAT16)
+REDUCE_SUM (FLOAT32, FLOAT16)
+RELU (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+RELU1 (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+RELU6 (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
RESHAPE (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-RESIZE_BILINEAR (FLOAT32, QUANT8_ASYMM)
-RESIZE_NEAREST_NEIGHBOR (FLOAT32, QUANT8_ASYMM)
-RSQRT (FLOAT32)
+RESIZE_BILINEAR (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+RESIZE_NEAREST_NEIGHBOR (FLOAT32, FLOAT16, QUANT8_ASYMM)
+RSQRT (FLOAT32, FLOAT16)
+SIN (FLOAT32, FLOAT16)
SOFTMAX (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-SPACE_TO_BATCH_ND (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+SPACE_TO_BATCH_ND (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
SPACE_TO_DEPTH (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-SQRT (FLOAT32)
+SPLIT (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+SQRT (FLOAT32, FLOAT16)
SQUEEZE (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
STRIDED_SLICE (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-SUB (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-TANH (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+SUB (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+TANH (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+TILE (All data types)
TRANSPOSE (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
TRANSPOSE_CONV_2D (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+UNIDIRECTIONAL_SEQUENCE_LSTM (FLOAT32, FLOAT16)
Where operations are not supported by the ArmNN Android NN Driver, the driver indicates this to the framework
appropriately and the framework implements those operations using a CPU implementation.
NOTE: By convention, only those tensor types have been listed above, which are fully supported across all
ArmNN backends.
- - FLOAT16 input tensors are partially supported on most HAL 1.2 operators on the GpuAcc and
- CpuRef backends, however not on CpuAcc. \ No newline at end of file
+ - FLOAT16 input tensors are partially supported on most HAL 1.2 and 1.3 operators on the GpuAcc and
+ CpuRef backends, however not on CpuAcc.
diff --git a/NnapiSupport.txt.license b/NnapiSupport.txt.license
new file mode 100644
index 00000000..739dc2d8
--- /dev/null
+++ b/NnapiSupport.txt.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2018-2022 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/README.md.license b/README.md.license
new file mode 100644
index 00000000..c3a88230
--- /dev/null
+++ b/README.md.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2018-2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/RequestThread.cpp b/RequestThread.cpp
index 927af922..783e351d 100644
--- a/RequestThread.cpp
+++ b/RequestThread.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -17,8 +17,6 @@
#include "ArmnnPreparedModel_1_3.hpp"
#endif
-#include <armnn/utility/Assert.hpp>
-
#include <log/log.h>
using namespace android;
@@ -134,8 +132,7 @@ void RequestThread<PreparedModel, HalVersion, CallbackContext>::Process()
default:
// this should be unreachable
- ALOGE("RequestThread::Process() - invalid message type");
- ARMNN_ASSERT_MSG(false, "ArmNN: RequestThread: invalid message type");
+ throw armnn::RuntimeException("ArmNN: RequestThread: invalid message type");
}
}
}
diff --git a/RequestThread_1_3.cpp b/RequestThread_1_3.cpp
index 59fa70ed..6133e290 100644
--- a/RequestThread_1_3.cpp
+++ b/RequestThread_1_3.cpp
@@ -1,15 +1,12 @@
//
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#define LOG_TAG "ArmnnDriver"
-#include "RequestThread_1_3.hpp"
-
#include "ArmnnPreparedModel_1_3.hpp"
-
-#include <armnn/utility/Assert.hpp>
+#include "RequestThread_1_3.hpp"
#include <log/log.h>
@@ -178,8 +175,7 @@ void RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::Process()
default:
// this should be unreachable
- ALOGE("RequestThread_1_3::Process() - invalid message type");
- ARMNN_ASSERT_MSG(false, "ArmNN: RequestThread_1_3: invalid message type");
+ throw armnn::RuntimeException("ArmNN: RequestThread_1_3: invalid message type");
}
}
}
diff --git a/SECURITY.md.license b/SECURITY.md.license
new file mode 100644
index 00000000..344cf337
--- /dev/null
+++ b/SECURITY.md.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/Utils.cpp b/Utils.cpp
index 930c2b24..58356ac1 100644
--- a/Utils.cpp
+++ b/Utils.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2021,2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -9,14 +9,12 @@
#include "Half.hpp"
#include <armnnSerializer/ISerializer.hpp>
+#include <armnnUtils/Filesystem.hpp>
#include <armnnUtils/Permute.hpp>
#include <armnn/Utils.hpp>
-#include <armnn/utility/Assert.hpp>
-#include <Filesystem.hpp>
#include <log/log.h>
-#include <cassert>
#include <cerrno>
#include <cinttypes>
#include <sstream>
@@ -31,44 +29,39 @@ namespace armnn_driver
{
const armnn::PermutationVector g_DontPermute{};
-namespace
-{
-
-void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorShape& inTensorShape, const void* input,
- void* output, size_t dataTypeSize, const armnn::PermutationVector& mappings)
-{
- assert(inTensorShape.GetNumDimensions() == 4U);
-
- armnnUtils::Permute(armnnUtils::Permuted(inTensorShape, mappings), mappings, input, output, dataTypeSize);
-}
-
-} // anonymous namespace
-
-void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorInfo& tensor, const void* input, void* output,
+void SwizzleAndroidNn4dTensorToArmNn(armnn::TensorInfo& tensorInfo, const void* input, void* output,
const armnn::PermutationVector& mappings)
{
- assert(tensor.GetNumDimensions() == 4U);
-
- armnn::DataType dataType = tensor.GetDataType();
+ if (tensorInfo.GetNumDimensions() != 4U)
+ {
+ throw armnn::InvalidArgumentException("NumDimensions must be 4");
+ }
+ armnn::DataType dataType = tensorInfo.GetDataType();
switch (dataType)
{
case armnn::DataType::Float16:
case armnn::DataType::Float32:
case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QSymmS16:
case armnn::DataType::QSymmS8:
case armnn::DataType::QAsymmS8:
- SwizzleAndroidNn4dTensorToArmNn(tensor.GetShape(), input, output, armnn::GetDataTypeSize(dataType), mappings);
+ // First swizzle tensor info
+ tensorInfo = armnnUtils::Permuted(tensorInfo, mappings);
+ // Then swizzle tensor data
+ armnnUtils::Permute(tensorInfo.GetShape(), mappings, input, output, armnn::GetDataTypeSize(dataType));
break;
default:
- ALOGW("Unknown armnn::DataType for swizzling");
- assert(0);
+ throw armnn::InvalidArgumentException("Unknown DataType for swizzling");
}
}
void* GetMemoryFromPool(V1_0::DataLocation location, const std::vector<android::nn::RunTimePoolInfo>& memPools)
{
// find the location within the pool
- assert(location.poolIndex < memPools.size());
+ if (location.poolIndex >= memPools.size())
+ {
+ throw armnn::InvalidArgumentException("The poolIndex is greater than the memPools size.");
+ }
const android::nn::RunTimePoolInfo& memPool = memPools[location.poolIndex];
@@ -107,20 +100,19 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_0::Operand& operand)
}
else
{
- bool dimensionsSpecificity[5] = { true, true, true, true, true };
- int count = 0;
- std::for_each(operand.dimensions.data(),
- operand.dimensions.data() + operand.dimensions.size(),
- [&](const unsigned int val)
- {
- if (val == 0)
- {
- dimensionsSpecificity[count] = false;
- }
- count++;
- });
+ std::vector<unsigned char> dimensionsSpecificity(operand.dimensions.size(), true);
- TensorShape tensorShape(operand.dimensions.size(), operand.dimensions.data(), dimensionsSpecificity);
+ for (unsigned int i = 0; i < static_cast<unsigned int>(operand.dimensions.size()); ++i)
+ {
+ auto dim = operand.dimensions[i];
+ if (dim == 0)
+ {
+ dimensionsSpecificity[i] = false;
+ }
+ }
+ TensorShape tensorShape(operand.dimensions.size(),
+ operand.dimensions.data(),
+ reinterpret_cast<const bool *>(dimensionsSpecificity.data()));
ret = TensorInfo(tensorShape, type);
}
@@ -195,9 +187,10 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand)
if (perChannel)
{
- // ExtraParams is expected to be of type channelQuant
- ARMNN_ASSERT(operand.extraParams.getDiscriminator() ==
- V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant);
+ if (operand.extraParams.getDiscriminator() != V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant)
+ {
+ throw armnn::InvalidArgumentException("ExtraParams is expected to be of type channelQuant");
+ }
auto perAxisQuantParams = operand.extraParams.channelQuant();
@@ -296,9 +289,10 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_3::Operand& operand)
if (perChannel)
{
// ExtraParams is expected to be of type channelQuant
- ARMNN_ASSERT(operand.extraParams.getDiscriminator() ==
- V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant);
-
+ if (operand.extraParams.getDiscriminator() != V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant)
+ {
+ throw armnn::InvalidArgumentException("ExtraParams is expected to be of type channelQuant");
+ }
auto perAxisQuantParams = operand.extraParams.channelQuant();
ret.SetQuantizationScales(perAxisQuantParams.scales);
@@ -340,39 +334,27 @@ std::string GetOperandSummary(const V1_3::Operand& operand)
#endif
-using DumpElementFunction = void (*)(const armnn::ConstTensor& tensor,
+template <typename TensorType>
+using DumpElementFunction = void (*)(const TensorType& tensor,
unsigned int elementIndex,
std::ofstream& fileStream);
namespace
{
-template <typename ElementType, typename PrintableType = ElementType>
-void DumpTensorElement(const armnn::ConstTensor& tensor, unsigned int elementIndex, std::ofstream& fileStream)
+template <typename TensorType, typename ElementType, typename PrintableType = ElementType>
+void DumpTensorElement(const TensorType& tensor, unsigned int elementIndex, std::ofstream& fileStream)
{
const ElementType* elements = reinterpret_cast<const ElementType*>(tensor.GetMemoryArea());
- fileStream << static_cast<PrintableType>(elements[elementIndex]) << ",";
+ fileStream << static_cast<PrintableType>(elements[elementIndex]) << " ";
}
-constexpr const char* MemoryLayoutString(const armnn::ConstTensor& tensor)
-{
- const char* str = "";
-
- switch (tensor.GetNumDimensions())
- {
- case 4: { str = "(BHWC) "; break; }
- case 3: { str = "(HWC) "; break; }
- case 2: { str = "(HW) "; break; }
- default: { str = ""; break; }
- }
-
- return str;
-}
} // namespace
+template <typename TensorType>
void DumpTensor(const std::string& dumpDir,
const std::string& requestName,
const std::string& tensorName,
- const armnn::ConstTensor& tensor)
+ const TensorType& tensor)
{
// The dump directory must exist in advance.
fs::path dumpPath = dumpDir;
@@ -387,38 +369,38 @@ void DumpTensor(const std::string& dumpDir,
return;
}
- DumpElementFunction dumpElementFunction = nullptr;
+ DumpElementFunction<TensorType> dumpElementFunction = nullptr;
switch (tensor.GetDataType())
{
case armnn::DataType::Float32:
{
- dumpElementFunction = &DumpTensorElement<float>;
+ dumpElementFunction = &DumpTensorElement<TensorType, float>;
break;
}
case armnn::DataType::QAsymmU8:
{
- dumpElementFunction = &DumpTensorElement<uint8_t, uint32_t>;
+ dumpElementFunction = &DumpTensorElement<TensorType, uint8_t, uint32_t>;
break;
}
case armnn::DataType::Signed32:
{
- dumpElementFunction = &DumpTensorElement<int32_t>;
+ dumpElementFunction = &DumpTensorElement<TensorType, int32_t>;
break;
}
case armnn::DataType::Float16:
{
- dumpElementFunction = &DumpTensorElement<armnn::Half>;
+ dumpElementFunction = &DumpTensorElement<TensorType, armnn::Half>;
break;
}
case armnn::DataType::QAsymmS8:
{
- dumpElementFunction = &DumpTensorElement<int8_t, int32_t>;
+ dumpElementFunction = &DumpTensorElement<TensorType, int8_t, int32_t>;
break;
}
case armnn::DataType::Boolean:
{
- dumpElementFunction = &DumpTensorElement<bool>;
+ dumpElementFunction = &DumpTensorElement<TensorType, bool>;
break;
}
default:
@@ -430,55 +412,53 @@ void DumpTensor(const std::string& dumpDir,
if (dumpElementFunction != nullptr)
{
const unsigned int numDimensions = tensor.GetNumDimensions();
+ const armnn::TensorShape shape = tensor.GetShape();
- const unsigned int batch = (numDimensions == 4) ? tensor.GetShape()[numDimensions - 4] : 1;
-
- const unsigned int height = (numDimensions >= 3)
- ? tensor.GetShape()[numDimensions - 3]
- : (numDimensions >= 2) ? tensor.GetShape()[numDimensions - 2] : 1;
-
- const unsigned int width = (numDimensions >= 3)
- ? tensor.GetShape()[numDimensions - 2]
- : (numDimensions >= 1) ? tensor.GetShape()[numDimensions - 1] : 0;
-
- const unsigned int channels = (numDimensions >= 3) ? tensor.GetShape()[numDimensions - 1] : 1;
-
+ if (!shape.AreAllDimensionsSpecified())
+ {
+ fileStream << "Cannot dump tensor elements: not all dimensions are specified" << std::endl;
+ return;
+ }
fileStream << "# Number of elements " << tensor.GetNumElements() << std::endl;
- fileStream << "# Dimensions " << MemoryLayoutString(tensor);
- fileStream << "[" << tensor.GetShape()[0];
- for (unsigned int d = 1; d < numDimensions; d++)
+
+ if (numDimensions == 0)
{
- fileStream << "," << tensor.GetShape()[d];
+ fileStream << "# Shape []" << std::endl;
+ return;
+ }
+ fileStream << "# Shape [" << shape[0];
+ for (unsigned int d = 1; d < numDimensions; ++d)
+ {
+ fileStream << "," << shape[d];
}
fileStream << "]" << std::endl;
+ fileStream << "Each line contains the data of each of the elements of dimension0. In NCHW and NHWC, each line"
+ " will be a batch" << std::endl << std::endl;
- for (unsigned int e = 0, b = 0; b < batch; ++b)
+ // Split will create a new line after all elements of the first dimension
+ // (in a 4, 3, 2, 3 tensor, there will be 4 lines of 18 elements)
+ unsigned int split = 1;
+ if (numDimensions == 1)
{
- if (numDimensions >= 4)
+ split = shape[0];
+ }
+ else
+ {
+ for (unsigned int i = 1; i < numDimensions; ++i)
{
- fileStream << "# Batch " << b << std::endl;
+ split *= shape[i];
}
- for (unsigned int c = 0; c < channels; c++)
+ }
+
+ // Print all elements in the tensor
+ for (unsigned int elementIndex = 0; elementIndex < tensor.GetNumElements(); ++elementIndex)
+ {
+ (*dumpElementFunction)(tensor, elementIndex, fileStream);
+
+ if ( (elementIndex + 1) % split == 0 )
{
- if (numDimensions >= 3)
- {
- fileStream << "# Channel " << c << std::endl;
- }
- for (unsigned int h = 0; h < height; h++)
- {
- for (unsigned int w = 0; w < width; w++, e += channels)
- {
- (*dumpElementFunction)(tensor, e, fileStream);
- }
- fileStream << std::endl;
- }
- e -= channels - 1;
- if (c < channels)
- {
- e -= ((height * width) - 1) * channels;
- }
+ fileStream << std::endl;
}
- fileStream << std::endl;
}
fileStream << std::endl;
}
@@ -494,6 +474,17 @@ void DumpTensor(const std::string& dumpDir,
}
}
+
+template void DumpTensor<armnn::ConstTensor>(const std::string& dumpDir,
+ const std::string& requestName,
+ const std::string& tensorName,
+ const armnn::ConstTensor& tensor);
+
+template void DumpTensor<armnn::Tensor>(const std::string& dumpDir,
+ const std::string& requestName,
+ const std::string& tensorName,
+ const armnn::Tensor& tensor);
+
void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
const std::string& dumpDir,
armnn::NetworkId networkId,
@@ -511,7 +502,11 @@ void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
return;
}
- ARMNN_ASSERT(profiler);
+ if (!profiler)
+ {
+ ALOGW("profiler was null");
+ return;
+ }
// Set the name of the output profiling file.
fs::path dumpPath = dumpDir;
@@ -571,37 +566,59 @@ std::string ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimize
return fileName;
}
-std::string SerializeNetwork(const armnn::INetwork& network, const std::string& dumpDir)
+std::string SerializeNetwork(const armnn::INetwork& network,
+ const std::string& dumpDir,
+ std::vector<uint8_t>& dataCacheData,
+ bool dataCachingActive)
{
std::string fileName;
- // The dump directory must exist in advance.
+ bool bSerializeToFile = true;
if (dumpDir.empty())
{
- return fileName;
+ bSerializeToFile = false;
}
-
- std::string timestamp = GetFileTimestamp();
- if (timestamp.empty())
+ else
+ {
+ std::string timestamp = GetFileTimestamp();
+ if (timestamp.empty())
+ {
+ bSerializeToFile = false;
+ }
+ }
+ if (!bSerializeToFile && !dataCachingActive)
{
return fileName;
}
auto serializer(armnnSerializer::ISerializer::Create());
-
// Serialize the Network
serializer->Serialize(network);
+ if (dataCachingActive)
+ {
+ std::stringstream stream;
+ auto serialized = serializer->SaveSerializedToStream(stream);
+ if (serialized)
+ {
+ std::string const serializedString{stream.str()};
+ std::copy(serializedString.begin(), serializedString.end(), std::back_inserter(dataCacheData));
+ }
+ }
- // Set the name of the output .armnn file.
- fs::path dumpPath = dumpDir;
- fs::path tempFilePath = dumpPath / (timestamp + "_network.armnn");
- fileName = tempFilePath.string();
-
- // Save serialized network to a file
- std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary);
- bool serialized = serializer->SaveSerializedToStream(serializedFile);
- if (!serialized)
+ if (bSerializeToFile)
{
- ALOGW("An error occurred when serializing to file %s", fileName.c_str());
+ // Set the name of the output .armnn file.
+ fs::path dumpPath = dumpDir;
+ std::string timestamp = GetFileTimestamp();
+ fs::path tempFilePath = dumpPath / (timestamp + "_network.armnn");
+ fileName = tempFilePath.string();
+
+ // Save serialized network to a file
+ std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary);
+ auto serialized = serializer->SaveSerializedToStream(serializedFile);
+ if (!serialized)
+ {
+ ALOGW("An error occurred when serializing to file %s", fileName.c_str());
+ }
}
return fileName;
}
@@ -629,6 +646,53 @@ bool AreDynamicTensorsSupported()
#endif
}
+bool isQuantizedOperand(const V1_0::OperandType& operandType)
+{
+ if (operandType == V1_0::OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3)// Using ::android::hardware::neuralnetworks::V1_2
+bool isQuantizedOperand(const V1_2::OperandType& operandType)
+{
+ if (operandType == V1_2::OperandType::TENSOR_QUANT8_ASYMM ||
+ operandType == V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL ||
+ operandType == V1_2::OperandType::TENSOR_QUANT8_SYMM ||
+ operandType == V1_2::OperandType::TENSOR_QUANT16_SYMM )
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+#endif
+
+#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3
+bool isQuantizedOperand(const V1_3::OperandType& operandType)
+{
+ if (operandType == V1_3::OperandType::TENSOR_QUANT8_ASYMM ||
+ operandType == V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL ||
+ operandType == V1_3::OperandType::TENSOR_QUANT8_SYMM ||
+ operandType == V1_3::OperandType::TENSOR_QUANT16_SYMM ||
+ operandType == V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+#endif
+
std::string GetFileTimestamp()
{
// used to get a timestamp to name diagnostic files (the ArmNN serialized graph
@@ -702,4 +766,67 @@ void CommitPools(std::vector<::android::nn::RunTimePoolInfo>& memPools)
#endif
}
}
+
+size_t GetSize(const V1_0::Request& request, const V1_0::RequestArgument& requestArgument)
+{
+ return request.pools[requestArgument.location.poolIndex].size();
+}
+
+#ifdef ARMNN_ANDROID_NN_V1_3
+size_t GetSize(const V1_3::Request& request, const V1_0::RequestArgument& requestArgument)
+{
+ if (request.pools[requestArgument.location.poolIndex].getDiscriminator() ==
+ V1_3::Request::MemoryPool::hidl_discriminator::hidlMemory)
+ {
+ return request.pools[requestArgument.location.poolIndex].hidlMemory().size();
+ }
+ else
+ {
+ return 0;
+ }
+}
+#endif
+
+template <typename ErrorStatus, typename Request>
+ErrorStatus ValidateRequestArgument(const Request& request,
+ const armnn::TensorInfo& tensorInfo,
+ const V1_0::RequestArgument& requestArgument,
+ std::string descString)
+{
+ if (requestArgument.location.poolIndex >= request.pools.size())
+ {
+ std::string err = fmt::format("Invalid {} pool at index {} the pool index is greater than the number "
+ "of available pools {}",
+ descString, requestArgument.location.poolIndex, request.pools.size());
+ ALOGE(err.c_str());
+ return ErrorStatus::GENERAL_FAILURE;
+ }
+ const size_t size = GetSize(request, requestArgument);
+ size_t totalLength = tensorInfo.GetNumBytes();
+
+ if (static_cast<size_t>(requestArgument.location.offset) + totalLength > size)
+ {
+ std::string err = fmt::format("Invalid {} pool at index {} the offset {} and length {} are greater "
+ "than the pool size {}", descString, requestArgument.location.poolIndex,
+ requestArgument.location.offset, totalLength, size);
+ ALOGE(err.c_str());
+ return ErrorStatus::GENERAL_FAILURE;
+ }
+ return ErrorStatus::NONE;
+}
+
+template V1_0::ErrorStatus ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(
+ const V1_0::Request& request,
+ const armnn::TensorInfo& tensorInfo,
+ const V1_0::RequestArgument& requestArgument,
+ std::string descString);
+
+#ifdef ARMNN_ANDROID_NN_V1_3
+template V1_3::ErrorStatus ValidateRequestArgument<V1_3::ErrorStatus, V1_3::Request>(
+ const V1_3::Request& request,
+ const armnn::TensorInfo& tensorInfo,
+ const V1_0::RequestArgument& requestArgument,
+ std::string descString);
+#endif
+
} // namespace armnn_driver
diff --git a/Utils.hpp b/Utils.hpp
index f68747b0..81be984c 100644
--- a/Utils.hpp
+++ b/Utils.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2021,2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -11,6 +11,8 @@
#include <NeuralNetworks.h>
#include <Utils.h>
+#include <fmt/format.h>
+
#include <vector>
#include <string>
#include <fstream>
@@ -60,7 +62,7 @@ public:
};
/// Swizzles tensor data in @a input according to the dimension mappings.
-void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorInfo& tensor, const void* input, void* output,
+void SwizzleAndroidNn4dTensorToArmNn(armnn::TensorInfo& tensor, const void* input, void* output,
const armnn::PermutationVector& mappings);
/// Returns a pointer to a specific location in a pool
@@ -70,22 +72,25 @@ void* GetMemoryFromPool(V1_0::DataLocation location,
/// Can throw UnsupportedOperand
armnn::TensorInfo GetTensorInfoForOperand(const V1_0::Operand& operand);
-#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) // Using ::android::hardware::neuralnetworks::V1_2
-armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand);
-#endif
-
-#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3
-armnn::TensorInfo GetTensorInfoForOperand(const V1_3::Operand& operand);
-#endif
-
std::string GetOperandSummary(const V1_0::Operand& operand);
+// Returns true for any quantized data type, false for the rest.
+bool isQuantizedOperand(const V1_0::OperandType& operandType);
+
#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) // Using ::android::hardware::neuralnetworks::V1_2
+armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand);
+
std::string GetOperandSummary(const V1_2::Operand& operand);
+
+bool isQuantizedOperand(const V1_2::OperandType& operandType);
#endif
#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3
+armnn::TensorInfo GetTensorInfoForOperand(const V1_3::Operand& operand);
+
std::string GetOperandSummary(const V1_3::Operand& operand);
+
+bool isQuantizedOperand(const V1_3::OperandType& operandType);
#endif
template <typename HalModel>
@@ -123,10 +128,11 @@ std::string GetModelSummary(const HalModel& model)
return result.str();
}
+template <typename TensorType>
void DumpTensor(const std::string& dumpDir,
const std::string& requestName,
const std::string& tensorName,
- const armnn::ConstTensor& tensor);
+ const TensorType& tensor);
void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
const std::string& dumpDir,
@@ -136,7 +142,10 @@ void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
std::string ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork,
const std::string& dumpDir);
-std::string SerializeNetwork(const armnn::INetwork& network, const std::string& dumpDir);
+std::string SerializeNetwork(const armnn::INetwork& network,
+ const std::string& dumpDir,
+ std::vector<uint8_t>& dataCacheData,
+ bool dataCachingActive = true);
void RenameExportedFiles(const std::string& existingSerializedFileName,
const std::string& existingDotFileName,
@@ -187,4 +196,9 @@ inline V1_2::OutputShape ComputeShape(const armnn::TensorInfo& info)
void CommitPools(std::vector<::android::nn::RunTimePoolInfo>& memPools);
+template <typename ErrorStatus, typename Request>
+ErrorStatus ValidateRequestArgument(const Request& request,
+ const armnn::TensorInfo& tensorInfo,
+ const V1_0::RequestArgument& requestArgument,
+ std::string descString);
} // namespace armnn_driver
diff --git a/android.hardware.neuralnetworks@1.0-service-armnn.rc.license b/android.hardware.neuralnetworks@1.0-service-armnn.rc.license
new file mode 100644
index 00000000..ce68ac48
--- /dev/null
+++ b/android.hardware.neuralnetworks@1.0-service-armnn.rc.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2018, 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/android.hardware.neuralnetworks@1.1-service-armnn.rc.license b/android.hardware.neuralnetworks@1.1-service-armnn.rc.license
new file mode 100644
index 00000000..ce68ac48
--- /dev/null
+++ b/android.hardware.neuralnetworks@1.1-service-armnn.rc.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2018, 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/android.hardware.neuralnetworks@1.2-service-armnn.rc.license b/android.hardware.neuralnetworks@1.2-service-armnn.rc.license
new file mode 100644
index 00000000..8cbac672
--- /dev/null
+++ b/android.hardware.neuralnetworks@1.2-service-armnn.rc.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2019-2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/android.hardware.neuralnetworks@1.3-service-armnn.rc.license b/android.hardware.neuralnetworks@1.3-service-armnn.rc.license
new file mode 100644
index 00000000..344cf337
--- /dev/null
+++ b/android.hardware.neuralnetworks@1.3-service-armnn.rc.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/docs/FAQ.md b/docs/FAQ.md
index bd79bb04..9b6f099b 100644
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -17,39 +17,6 @@ https://android.googlesource.com/platform/test/vts-testcase/hal/+/f74899c6c09b52
An acceptable workaround is to increase the timeout defined in AndroidTest.xml, in a similar way to https://android.googlesource.com/platform/test/vts-testcase/hal/+/f74899c6c09b52703e6db0323dffb4ae52539db4.
-Problems seen when trying to build the android-nn-driver obtained from GitHub
------------------------------------------------------------------------------
-
-Some users have encountered difficulties when attempting to build copies of the android-nn-driver obtained from GitHub. The build reports missing module source paths from armnn, clframework, flatbuffers-1.12.0 or boost_1_64_0.
-These errors can look
-like this:
-
-'error: vendor/arm/android-nn-driver/Android.bp:45:1: variant "android_arm64_armv7": module "armnn-arm_compute" "module source path "vendor/arm/android-nn-driver/clframework/build/android-arm64v8a/src/core/CL" does not exist'
-
-These errors are due to missing dependencies or incompatiblities between the android-nn-driver and armnn or clframework versions. The android-nn-driver requires boost_1_64_0 to build unit tests. The versions of android-nn-driver, armnn and clframework will have to match for them to work together. For example, the 19.08 version of android-nn-driver, clframework and armnn will work together but none of them will work with earlier or later versions of the others.
-
-In order to ensure that the correct versions of flatbuffers, boost, armnn and the clframework are obtained you can do the following:
-
-1. Delete or move any flatbuffers, boost, armnn or clframework directories from the android-nn-driver directory.
-2. Run the setup.sh script in the android-nn-driver directory.
-
-This will download the correct versions of flatbuffers, boost, armnn and the clframework and the android-nn-driver should build
-correctly. Alternatively you can go to the GitHub pages for android-nn-driver, armnn and computelibrary (clframework) and download versions with the same release tag.
-
-As an example, for 20.05 these would be:
-
-https://github.com/ARM-software/android-nn-driver/tree/v20.05
-https://github.com/ARM-software/armnn/tree/v20.05
-https://github.com/ARM-software/computelibrary/tree/v20.05
-
-The correct version of boost (1_64_0) can be downloaded from:
-
-https://www.boost.org/
-
-The correct version of flatbuffers (1.12.0) can be downloaded from:
-
-https://github.com/google/flatbuffers/archive/v1.12.0.tar.gz
-
Instance Normalization test failures
------------------------------------
@@ -58,7 +25,7 @@ There is a known issue in the Android NNAPI implementation of Instance Normaliza
VTS and CTS test failures
-------------------------
-With the release of the Android 10 R2 CTS some errors and crashes were discovered in the 19.08 and 19.11 releases of armnn, the android-nn-driver and ComputeLibrary. 19.08.01 and 19.11.01 releases of armnn, the android-nn-driver and ComputeLibrary were prepared that fix all these issues on CpuAcc and GpuAcc. If using 19.08 or 19.11 we recommend that you upgrade to the 19.08.01 or 19.11.01 releases. These issues have also been fixed in the 20.02 and later releases of armnn, the android-nn-driver and ComputeLibrary.
+With Android 10 R2 CTS some errors and crashes were discovered in the 19.08 and 19.11 releases of armnn, the android-nn-driver and ComputeLibrary. 19.08.01 and 19.11.01 releases of armnn, the android-nn-driver and ComputeLibrary were prepared that fix all these issues on CpuAcc and GpuAcc. If using 19.08 or 19.11 we recommend that you upgrade to the latest releases.
These fixes also required patches to be made to the Android Q test framework. You may encounter CTS and VTS test failures when attempting to build copies of the android-nn-driver against older versions of Android Q.
@@ -75,4 +42,4 @@ In order to fix these failures you will have to update to a version of Android Q
The Android 10 R3 CTS that can be downloaded from https://source.android.com/compatibility/cts/downloads contains all these patches.
-There is a known issue that even with these patches CTS tests "TestRandomGraph/RandomGraphTest#LargeGraph_TENSOR_FLOAT16_Rank3/41" and "TestRandomGraph/RandomGraphTest#LargeGraph_TENSOR_FLOAT16_Rank2/20 " will still fail on CpuRef. These failures are caused by a LogSoftmax layer followed by a Floor layer which blows up the slight difference between fp16 to fp32. This issue only affects CpuRef with Android Q. These tests are not failing for Android R. \ No newline at end of file
+There is a known issue that even with these patches CTS tests "TestRandomGraph/RandomGraphTest#LargeGraph_TENSOR_FLOAT16_Rank3/41" and "TestRandomGraph/RandomGraphTest#LargeGraph_TENSOR_FLOAT16_Rank2/20 " will still fail on CpuRef. These failures are caused by a LogSoftmax layer followed by a Floor layer which blows up the slight difference between fp16 to fp32. This issue only affects CpuRef with Android Q. These tests are not failing for Android R.
diff --git a/docs/FAQ.md.license b/docs/FAQ.md.license
new file mode 100644
index 00000000..68a3f516
--- /dev/null
+++ b/docs/FAQ.md.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2019-2022 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/docs/IntegratorGuide.md b/docs/IntegratorGuide.md
index 82177f72..55c9b9a7 100644
--- a/docs/IntegratorGuide.md
+++ b/docs/IntegratorGuide.md
@@ -5,10 +5,11 @@ This document describes how to integrate the Arm NN Android NNAPI driver into an
### Prerequisites
-1. Android source tree for Android P (we have tested against Android P version 9.0.0_r3) , in the directory `<ANDROID_ROOT>`
-2. Android source tree for Android Q (we have tested against Android Q version 10.0.0_r39), in the directory `<ANDROID_ROOT>`
+1. Android source tree for Android Q (we have tested against Android Q version 10.0.0_r39), in the directory `<ANDROID_ROOT>`
2. Android source tree for Android R (we have tested against Android R version 11.0.0_r3), in the directory `<ANDROID_ROOT>`
-3. Mali OpenCL driver integrated into the Android source tree
+3. Android source tree for Android S (we have tested against Android S version 12.0.0_r1), in the directory `<ANDROID_ROOT>`
+4. Android source tree for Android T (we have tested against Android T pre-release tag - TP1A.220624.003), in the directory `<ANDROID_ROOT>`
+5. Mali OpenCL driver integrated into the Android source tree
### Procedure
@@ -20,11 +21,7 @@ To update the build environment, add to the contents of the variable `PRODUCT_PA
within the device-specific makefile that is located in the `<ANDROID_ROOT>/device/<manufacturer>/<product>`
directory. This file is normally called `device.mk`:
-`Android.mk` contains the module definition of all versions (1.0, 1.1, 1.2 and 1.3) of the Arm NN driver.
-For Android P, a new version of NN API is available (1.1), thus the following should be added to `device.mk` instead:
-<pre>
-PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.1-service-armnn
-</pre>
+`Android.mk` contains the module definition of all versions (1.1, 1.2 and 1.3) of the Arm NN driver.
For Android Q, a new version of the NN API is available (1.2),
thus the following should be added to `device.mk` instead:
@@ -32,7 +29,7 @@ thus the following should be added to `device.mk` instead:
PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.2-service-armnn
</pre>
-For Android R, new version of the NN API is available (1.3),
+For Android R, S and T, new version of the NN API is available (1.3),
thus the following should be added to `device.mk` instead:
<pre>
PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.3-service-armnn
@@ -44,34 +41,25 @@ ARMNN_COMPUTE_NEON_ENABLE or ARMNN_REF_ENABLE in `device.mk`:
ARMNN_COMPUTE_CL_ENABLE := 1
</pre>
-For Android P, Q and R the vendor manifest.xml requires the Neural Network HAL information.
-For Android P use HAL version 1.1 as below. For Android Q substitute 1.2 where necessary. For Android R substitute 1.3 where necessary.
+For all Android versions the vendor manifest.xml requires the Neural Network HAL information.
+For Android Q use HAL version 1.2 as below. For later Android versions substitute 1.3 where necessary.
```xml
<hal format="hidl">
<name>android.hardware.neuralnetworks</name>
<transport>hwbinder</transport>
- <version>1.1</version>
+ <version>1.2</version>
<interface>
<name>IDevice</name>
<instance>armnn</instance>
</interface>
- <fqname>@1.1::IDevice/armnn</fqname>
+ <fqname>@1.2::IDevice/armnn</fqname>
</hal>
```
-4. Build Android as normal, i.e. run `make` in `<ANDROID_ROOT>`
+4. Build Android as normal (https://source.android.com/setup/build/building)
5. To confirm that the Arm NN driver has been built, check for the driver service executable at
-Android P
-<pre>
-<ANDROID_ROOT>/out/target/product/<product>/system/vendor/bin/hw
-</pre>
-For example, if the Arm NN driver has been built with the NN API 1.1, check for the following file:
-<pre>
-<ANDROID_ROOT>/out/target/product/<product>/system/vendor/bin/hw/android.hardware.neuralnetworks@1.1-service-armnn
-</pre>
-
-Android Q and later has a different path:
+Android Q
<pre>
<ANDROID_ROOT>/out/target/product/<product>/vendor/bin/hw
</pre>
@@ -81,9 +69,8 @@ Android Q and later has a different path:
1. Run the Arm NN driver service executable in the background.
Use the corresponding version of the driver for the Android version you are running.
i.e
-android.hardware.neuralnetworks@1.1-service-armnn for Android P,
android.hardware.neuralnetworks@1.2-service-armnn for Android Q and
-android.hardware.neuralnetworks@1.3-service-armnn for Android R
+android.hardware.neuralnetworks@1.3-service-armnn for Android R, S and T
<pre>
It is also possible to use a specific backend by using the -c option.
The following is an example of using the CpuAcc backend for Android Q:
@@ -107,14 +94,104 @@ Rapid means that only 3 lws values should be tested for each kernel.
The recommended way of using it with Arm NN is to generate the tuning data during development of the Android image for a device, and use it in read-only mode during normal operation:
1. Run the Arm NN driver service executable in tuning mode. The path to the tuning data must be writable by the service.
-The following examples assume that the 1.1 version of the driver is being used:
+The following examples assume that the 1.2 version of the driver is being used:
<pre>
-adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.1-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; --cl-tuned-parameters-mode UpdateTunedParameters --cl-tuning-level exhaustive &
+adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.2-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; --cl-tuned-parameters-mode UpdateTunedParameters --cl-tuning-level exhaustive &
</pre>
2. Run a representative set of Android NNAPI testing loads. In this mode of operation, each NNAPI workload will be slow the first time it is executed, as the tuning parameters are being selected. Subsequent executions will use the tuning data which has been generated.
3. Stop the service.
4. Deploy the tuned parameters file to a location readable by the Arm NN driver service (for example, to a location within /vendor/etc).
5. During normal operation, pass the location of the tuning data to the driver service (this would normally be done by passing arguments via Android init in the service .rc definition):
<pre>
-adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.1-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; &
+adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.2-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; &
</pre>
+
+### Specifying the Capabilities for the Driver
+
+The Android NNAPI framework specifies a means for a Driver to specify its Capabilities. These are relevant in situations where there are multiple drivers on a device and the NNAPI needs to choose one to use for a model. The Android NNAPI documentation gives an overview of them and how they're used at https://source.android.com/docs/core/interaction/neural-networks
+
+These values will be hardware dependent and, as we can't know how any specific hardware is configured and we have no idea of the kind of values that the creators of other drivers might use, we leave it up to the Integrator to specify the values. The Android documentation linked above also provides some guidelines on measuring performance when generating these values.
+
+As the Arm NN driver service initialises it will look for system properties containing the performance values to return when the NNAPI service requests the drivers Capabilities. The properties must all be 32-bit Float values and specify execution performance as well as power usage (in some circumstances Android may prefer low power consumption over high performance).
+
+As each new HAL version was introduced the number of properties increased. The following is a list of the system properties that are looked for when the driver starts for each HAL version.
+
+#### HAL 1.0
+
+Initially the HAL 1.0 service only supported Float 32 and Quantized int8.
+
+* ArmNN.float32Performance.execTime
+* ArmNN.float32Performance.powerUsage
+* ArmNN.quantized8Performance.execTime
+* ArmNN.quantized8Performance.powerUsage
+
+#### HAL 1.1
+
+HAL 1.1 added a performance setting for relaxedFloat32toFloat16Performance.
+
+* ArmNN.float32Performance.execTime
+* ArmNN.float32Performance.powerUsage
+* ArmNN.quantized8Performance.execTime
+* ArmNN.quantized8Performance.powerUsage
+* ArmNN.relaxedFloat32toFloat16Performance.execTime
+* ArmNN.relaxedFloat32toFloat16Performance.powerUsage
+
+#### HAL 1.2
+
+HAL 1.2 added support for a number of new operand types.
+
+* ArmNN.relaxedFloat32toFloat16Performance.execTime
+* ArmNN.relaxedFloat32toFloat16Performance.powerUsage
+* Armnn.operandTypeTensorFloat32Performance.execTime
+* Armnn.operandTypeTensorFloat32Performance.powerUsage
+* Armnn.operandTypeFloat32Performance.execTime
+* Armnn.operandTypeFloat32Performance.powerUsage
+* Armnn.operandTypeTensorFloat16Performance.execTime
+* Armnn.operandTypeTensorFloat16Performance.powerUsage
+* Armnn.operandTypeFloat16Performance.execTime
+* Armnn.operandTypeFloat16Performance.powerUsage
+* Armnn.operandTypeTensorQuant8AsymmPerformance.execTime
+* Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage
+* Armnn.operandTypeTensorQuant16SymmPerformance.execTime
+* Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage
+* Armnn.operandTypeTensorQuant8SymmPerformance.execTime
+* Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage
+* Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime
+* Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage
+* Armnn.operandTypeTensorInt32Performance.execTime
+* Armnn.operandTypeTensorInt32Performance.powerUsage
+* Armnn.operandTypeInt32Performance.execTime
+* Armnn.operandTypeInt32Performance.powerUsage
+
+#### HAL 1.3
+
+HAL 1.3 added support for the control flow operations If and While. Please note ArmNN does not currently support If or While and until it does ignoring these system properties is appropriate.
+
+* ArmNN.relaxedFloat32toFloat16Performance.powerUsage
+* ArmNN.relaxedFloat32toFloat16Performance.powerUsage
+* ArmNN.ifPerformance.execTime
+* ArmNN.ifPerformance.powerUsage
+* ArmNN.whilePerformance.execTime
+* ArmNN.whilePerformance.powerUsage
+* Armnn.operandTypeTensorFloat32Performance.execTime
+* Armnn.operandTypeTensorFloat32Performance.powerUsage
+* Armnn.operandTypeFloat32Performance.execTime
+* Armnn.operandTypeFloat32Performance.powerUsage
+* Armnn.operandTypeTensorFloat16Performance.execTime
+* Armnn.operandTypeTensorFloat16Performance.powerUsage
+* Armnn.operandTypeFloat16Performance.execTime
+* Armnn.operandTypeFloat16Performance.powerUsage
+* Armnn.operandTypeTensorQuant8AsymmPerformance.execTime
+* Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage
+* Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime
+* Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage
+* Armnn.operandTypeTensorQuant16SymmPerformance.execTime
+* Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage
+* Armnn.operandTypeTensorQuant8SymmPerformance.execTime
+* Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage
+* Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime
+* Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage
+* Armnn.operandTypeTensorInt32Performance.execTime
+* Armnn.operandTypeTensorInt32Performance.powerUsage
+* Armnn.operandTypeInt32Performance.execTime
+* Armnn.operandTypeInt32Performance.powerUsage
diff --git a/docs/IntegratorGuide.md.license b/docs/IntegratorGuide.md.license
new file mode 100644
index 00000000..68a3f516
--- /dev/null
+++ b/docs/IntegratorGuide.md.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2019-2022 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/setup.sh b/setup.sh
index b6508a4c..2887678a 100755
--- a/setup.sh
+++ b/setup.sh
@@ -1,5 +1,10 @@
#!/bin/bash
+#
+# Copyright © 2018,2020-2023 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
function AssertZeroExitCode {
EXITCODE=$?
if [ $EXITCODE -ne 0 ]; then
@@ -9,35 +14,44 @@ function AssertZeroExitCode {
fi
}
-if [ ! -d boost_1_64_0 ]; then
- echo "++ Downloading Boost"
-
- BOOST_PKG=boost_1_64_0.tar.gz
-
- # There is a problem with downloading boost from the external. Issue can be found here:https://github.com/boostorg/boost/issues/299.
- # Using a mirror link to download boost.
- curl -LOk https://dl.bintray.com/boostorg/release/1.64.0/source/boost_1_64_0.tar.gz
- # curl -LOk https://sourceforge.net/projects/boost/files/boost/1.64.0/boost_1_64_0.tar.gz # had switched to this mirror as we were not able to download boost from boostorg.
- AssertZeroExitCode "Downloading Boost failed"
-
- tar xzf $BOOST_PKG
- AssertZeroExitCode "Unpacking Boost failed"
-
- rm -rf $BOOST_PKG
-fi
-
+BUILD_DIR=build-x86_64
+FLATBUFFERS_DIR=$PWD/flatbuffers
+
+function BuildFlatbuffers {
+ pushd flatbuffers
+ rm -rf $BUILD_DIR
+ rm -f CMakeCache.txt
+ FLATBUFFERS_DIR=$PWD
+
+ mkdir -p $BUILD_DIR
+ cd $BUILD_DIR
+
+ echo "+++ Building Google Flatbufers"
+ CMD="cmake -DFLATBUFFERS_BUILD_FLATC=1 -DCMAKE_INSTALL_PREFIX:PATH=$FLATBUFFERS_DIR .."
+ # Force -fPIC to allow relocatable linking.
+ CXXFLAGS="-fPIC" $CMD
+ AssertZeroExitCode "cmake Google Flatbuffers failed. command was: ${CMD}"
+ make all install
+ AssertZeroExitCode "Building Google Flatbuffers failed"
+ mkdir -p $FLATBUFFERS_DIR/bin
+ cp -f flatc $FLATBUFFERS_DIR/bin
+ AssertZeroExitCode "Failed to copy the Flatbuffers Compiler"
+ popd
+}
-if [ ! -d v1.12.0 ]; then
- echo "++ Downloading FlatBuffers"
+if [ ! -d flatbuffers ]; then
+ echo "++ Downloading FlatBuffers v2.0.6"
- FLATBUFFERS_PKG=v1.12.0.tar.gz
+ FLATBUFFERS_PKG=v2.0.6.tar.gz
- curl -LOk https://github.com/google/flatbuffers/archive/v1.12.0.tar.gz
+ curl -LOk https://github.com/google/flatbuffers/archive/${FLATBUFFERS_PKG}
AssertZeroExitCode "Downloading FlatBuffers failed"
-
- tar xzf $FLATBUFFERS_PKG
+ mkdir -p flatbuffers
+ tar xzf $FLATBUFFERS_PKG -C flatbuffers --strip-components 1
AssertZeroExitCode "Unpacking FlatBuffers failed"
+ BuildFlatbuffers
+
rm -rf $FLATBUFFERS_PKG
fi
@@ -69,3 +83,10 @@ scons os=android build=embed_only neon=0 opencl=1 embed_kernels=1 validation_tes
AssertZeroExitCode "Precompiling clframework failed for v8a."
popd
+if [ ! -d armnn/generated ]; then
+ mkdir -p armnn/generated
+fi
+
+echo "+++ Generating new ArmnnSchema_generated.h"
+$FLATBUFFERS_DIR/bin/flatc -o armnn/generated --cpp armnn/src/armnnSerializer/ArmnnSchema.fbs
+AssertZeroExitCode "Generating ArmnnSchema_generated.h failed."
diff --git a/test/1.0/Convolution2D.cpp b/test/1.0/Convolution2D.cpp
index 9a5d2393..2af09157 100644
--- a/test/1.0/Convolution2D.cpp
+++ b/test/1.0/Convolution2D.cpp
@@ -1,19 +1,14 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
-#include "../DriverTestHelpers.hpp"
#include "../Convolution2D.hpp"
-#include "../../1.0/HalPolicy.hpp"
-#include <boost/test/unit_test.hpp>
#include <log/log.h>
#include <OperationsUtils.h>
-BOOST_AUTO_TEST_SUITE(Convolution2DTests)
-
using namespace android::hardware;
using namespace driverTestHelpers;
using namespace armnn_driver;
@@ -29,14 +24,17 @@ void SetModelFp16Flag(V1_0::Model&, bool)
} // namespace driverTestHelpers
-BOOST_AUTO_TEST_CASE(ConvValidPadding_Hal_1_0)
+DOCTEST_TEST_SUITE("Convolution2DTests_1.0")
+{
+
+DOCTEST_TEST_CASE("ConvValidPadding_Hal_1_0")
{
PaddingTestImpl<hal_1_0::HalPolicy>(android::nn::kPaddingValid);
}
-BOOST_AUTO_TEST_CASE(ConvSamePadding_Hal_1_0)
+DOCTEST_TEST_CASE("ConvSamePadding_Hal_1_0")
{
PaddingTestImpl<hal_1_0::HalPolicy>(android::nn::kPaddingSame);
}
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/1.0/FullyConnectedReshape.cpp b/test/1.0/FullyConnectedReshape.cpp
index 72c90ca5..e481f2d2 100644
--- a/test/1.0/FullyConnectedReshape.cpp
+++ b/test/1.0/FullyConnectedReshape.cpp
@@ -1,42 +1,39 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "../DriverTestHelpers.hpp"
-#include "../../1.0/FullyConnected.hpp"
-#include <boost/test/unit_test.hpp>
-
-BOOST_AUTO_TEST_SUITE(FullyConnectedReshapeTests)
-
-BOOST_AUTO_TEST_CASE(TestFlattenFullyConnectedInput)
+DOCTEST_TEST_SUITE("FullyConnectedReshapeTests")
+{
+DOCTEST_TEST_CASE("TestFlattenFullyConnectedInput")
{
using armnn::TensorShape;
// Pass through 2d input
- BOOST_TEST(FlattenFullyConnectedInput(TensorShape({2,2048}), TensorShape({512, 2048})) ==
- TensorShape({2, 2048}));
+ DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({2,2048}),
+ TensorShape({512, 2048})) == TensorShape({2, 2048}));
// Trivial flattening of batched channels
- BOOST_TEST(FlattenFullyConnectedInput(TensorShape({97,1,1,2048}), TensorShape({512, 2048})) ==
- TensorShape({97, 2048}));
+ DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({97,1,1,2048}),
+ TensorShape({512, 2048})) == TensorShape({97, 2048}));
// Flatten single batch of rows
- BOOST_TEST(FlattenFullyConnectedInput(TensorShape({1,97,1,2048}), TensorShape({512, 2048})) ==
- TensorShape({97, 2048}));
+ DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({1,97,1,2048}),
+ TensorShape({512, 2048})) == TensorShape({97, 2048}));
// Flatten single batch of columns
- BOOST_TEST(FlattenFullyConnectedInput(TensorShape({1,1,97,2048}), TensorShape({512, 2048})) ==
- TensorShape({97, 2048}));
+ DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({1,1,97,2048}),
+ TensorShape({512, 2048})) == TensorShape({97, 2048}));
// Move batches into input dimension
- BOOST_TEST(FlattenFullyConnectedInput(TensorShape({50,1,1,10}), TensorShape({512, 20})) ==
- TensorShape({25, 20}));
+ DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({50,1,1,10}),
+ TensorShape({512, 20})) == TensorShape({25, 20}));
// Flatten single batch of 3D data (e.g. convolution output)
- BOOST_TEST(FlattenFullyConnectedInput(TensorShape({1,16,16,10}), TensorShape({512, 2560})) ==
- TensorShape({1, 2560}));
+ DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({1,16,16,10}),
+ TensorShape({512, 2560})) == TensorShape({1, 2560}));
}
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/1.0/Lstm.cpp b/test/1.0/Lstm.cpp
index 5f0a209d..6b3e7042 100644
--- a/test/1.0/Lstm.cpp
+++ b/test/1.0/Lstm.cpp
@@ -1,34 +1,60 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "../Lstm.hpp"
-#include <boost/test/data/test_case.hpp>
-
-BOOST_AUTO_TEST_SUITE(LstmTests)
-
using namespace armnn_driver;
-BOOST_DATA_TEST_CASE(LstmNoCifgNoPeepholeNoProjectionTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("LstmTests_1.0_CpuRef")
{
- LstmNoCifgNoPeepholeNoProjection<hal_1_0::HalPolicy>(sample);
-}
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionTest, COMPUTE_DEVICES)
-{
- LstmCifgPeepholeNoProjection<hal_1_0::HalPolicy>(sample);
-}
+ DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.0_armnn::Compute::CpuRef")
+ {
+ LstmNoCifgNoPeepholeNoProjection<hal_1_0::HalPolicy>(armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.0_CpuRef")
+ {
+ LstmCifgPeepholeNoProjection<hal_1_0::HalPolicy>(armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.0_CpuRef")
+ {
+ LstmNoCifgPeepholeProjection<hal_1_0::HalPolicy>(armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.0_CpuRef")
+ {
+ LstmCifgPeepholeNoProjectionBatch2<hal_1_0::HalPolicy>(armnn::Compute::CpuRef);
+ }
-BOOST_DATA_TEST_CASE(LstmNoCifgPeepholeProjectionTest, COMPUTE_DEVICES)
-{
- LstmNoCifgPeepholeProjection<hal_1_0::HalPolicy>(sample);
}
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionBatch2Test, COMPUTE_DEVICES)
+#if defined(ARMCOMPUTECL_ENABLED)
+DOCTEST_TEST_SUITE("LstmTests_1.0_GpuAcc")
{
- LstmCifgPeepholeNoProjectionBatch2<hal_1_0::HalPolicy>(sample);
-}
-BOOST_AUTO_TEST_SUITE_END()
+ DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.0_GpuAcc")
+ {
+ LstmNoCifgNoPeepholeNoProjection<hal_1_0::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.0_GpuAcc")
+ {
+ LstmCifgPeepholeNoProjection<hal_1_0::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.0_GpuAcc")
+ {
+ LstmNoCifgPeepholeProjection<hal_1_0::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.0_GpuAcc")
+ {
+ LstmCifgPeepholeNoProjectionBatch2<hal_1_0::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+}
+#endif
diff --git a/test/1.1/Convolution2D.cpp b/test/1.1/Convolution2D.cpp
index 32d5018c..4601f760 100644
--- a/test/1.1/Convolution2D.cpp
+++ b/test/1.1/Convolution2D.cpp
@@ -1,19 +1,14 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
-#include "../DriverTestHelpers.hpp"
#include "../Convolution2D.hpp"
-#include "../../1.1/HalPolicy.hpp"
-#include <boost/test/unit_test.hpp>
#include <log/log.h>
#include <OperationsUtils.h>
-BOOST_AUTO_TEST_SUITE(Convolution2DTests)
-
using namespace android::hardware;
using namespace driverTestHelpers;
using namespace armnn_driver;
@@ -29,24 +24,28 @@ void SetModelFp16Flag(V1_1::Model& model, bool fp16Enabled)
} // namespace driverTestHelpers
-BOOST_AUTO_TEST_CASE(ConvValidPadding_Hal_1_1)
+
+DOCTEST_TEST_SUITE("Convolution2DTests_1.1")
+{
+
+DOCTEST_TEST_CASE("ConvValidPadding_Hal_1_1")
{
PaddingTestImpl<hal_1_1::HalPolicy>(android::nn::kPaddingValid);
}
-BOOST_AUTO_TEST_CASE(ConvSamePadding_Hal_1_1)
+DOCTEST_TEST_CASE("ConvSamePadding_Hal_1_1")
{
PaddingTestImpl<hal_1_1::HalPolicy>(android::nn::kPaddingSame);
}
-BOOST_AUTO_TEST_CASE(ConvValidPaddingFp16Flag_Hal_1_1)
+DOCTEST_TEST_CASE("ConvValidPaddingFp16Flag_Hal_1_1")
{
PaddingTestImpl<hal_1_1::HalPolicy>(android::nn::kPaddingValid, true);
}
-BOOST_AUTO_TEST_CASE(ConvSamePaddingFp16Flag_Hal_1_1)
+DOCTEST_TEST_CASE("ConvSamePaddingFp16Flag_Hal_1_1")
{
PaddingTestImpl<hal_1_1::HalPolicy>(android::nn::kPaddingSame, true);
}
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/1.1/Lstm.cpp b/test/1.1/Lstm.cpp
index 703597e5..cbdf6b14 100644
--- a/test/1.1/Lstm.cpp
+++ b/test/1.1/Lstm.cpp
@@ -1,34 +1,60 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "../Lstm.hpp"
-#include <boost/test/data/test_case.hpp>
-
-BOOST_AUTO_TEST_SUITE(LstmTests)
-
using namespace armnn_driver;
-BOOST_DATA_TEST_CASE(LstmNoCifgNoPeepholeNoProjectionTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("LstmTests_1.1_CpuRef")
{
- LstmNoCifgNoPeepholeNoProjection<hal_1_1::HalPolicy>(sample);
-}
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionTest, COMPUTE_DEVICES)
-{
- LstmCifgPeepholeNoProjection<hal_1_1::HalPolicy>(sample);
-}
+ DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.1_armnn::Compute::CpuRef")
+ {
+ LstmNoCifgNoPeepholeNoProjection<hal_1_1::HalPolicy>(armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.1_CpuRef")
+ {
+ LstmCifgPeepholeNoProjection<hal_1_1::HalPolicy>(armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.1_CpuRef")
+ {
+ LstmNoCifgPeepholeProjection<hal_1_1::HalPolicy>(armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.1_CpuRef")
+ {
+ LstmCifgPeepholeNoProjectionBatch2<hal_1_1::HalPolicy>(armnn::Compute::CpuRef);
+ }
-BOOST_DATA_TEST_CASE(LstmNoCifgPeepholeProjectionTest, COMPUTE_DEVICES)
-{
- LstmNoCifgPeepholeProjection<hal_1_1::HalPolicy>(sample);
}
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionBatch2Test, COMPUTE_DEVICES)
+#if defined(ARMCOMPUTECL_ENABLED)
+DOCTEST_TEST_SUITE("LstmTests_1.1_GpuAcc")
{
- LstmCifgPeepholeNoProjectionBatch2<hal_1_1::HalPolicy>(sample);
-}
-BOOST_AUTO_TEST_SUITE_END()
+ DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.1_GpuAcc")
+ {
+ LstmNoCifgNoPeepholeNoProjection<hal_1_1::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.1_GpuAcc")
+ {
+ LstmCifgPeepholeNoProjection<hal_1_1::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.1_GpuAcc")
+ {
+ LstmNoCifgPeepholeProjection<hal_1_1::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.1_GpuAcc")
+ {
+ LstmCifgPeepholeNoProjectionBatch2<hal_1_1::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+}
+#endif
diff --git a/test/1.1/Mean.cpp b/test/1.1/Mean.cpp
index c9a5a6d3..70bdc3d3 100644
--- a/test/1.1/Mean.cpp
+++ b/test/1.1/Mean.cpp
@@ -1,19 +1,15 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "../DriverTestHelpers.hpp"
#include "../TestTensor.hpp"
-#include "../1.1/HalPolicy.hpp"
-
-#include <boost/test/data/test_case.hpp>
+#include <1.1/HalPolicy.hpp>
#include <array>
-BOOST_AUTO_TEST_SUITE(MeanTests)
-
using namespace android::hardware;
using namespace driverTestHelpers;
using namespace armnn_driver;
@@ -24,12 +20,6 @@ using RequestArgument = V1_0::RequestArgument;
namespace
{
-#ifndef ARMCOMPUTECL_ENABLED
- static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
- static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::GpuAcc }};
-#endif
-
void MeanTestImpl(const TestTensor& input,
const hidl_vec<uint32_t>& axisDimensions,
const int32_t* axisValues,
@@ -94,64 +84,177 @@ void MeanTestImpl(const TestTensor& input,
if (preparedModel.get() != nullptr)
{
V1_0::ErrorStatus execStatus = Execute(preparedModel, request);
- BOOST_TEST(execStatus == V1_0::ErrorStatus::NONE);
+ DOCTEST_CHECK((int)execStatus == (int)V1_0::ErrorStatus::NONE);
}
const float* expectedOutputData = expectedOutput.GetData();
for (unsigned int i = 0; i < expectedOutput.GetNumElements(); i++)
{
- BOOST_TEST(outputData[i] == expectedOutputData[i]);
+ DOCTEST_CHECK(outputData[i] == expectedOutputData[i]);
}
}
} // anonymous namespace
-BOOST_DATA_TEST_CASE(MeanNoKeepDimsTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("MeanTests_CpuRef")
{
- TestTensor input{ armnn::TensorShape{ 4, 3, 2 }, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
- 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
- 20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
- hidl_vec<uint32_t> axisDimensions = { 2 };
- int32_t axisValues[] = { 0, 1 };
- int32_t keepDims = 0;
- TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
-
- MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, sample);
-}
-BOOST_DATA_TEST_CASE(MeanKeepDimsTest, COMPUTE_DEVICES)
-{
- TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
- hidl_vec<uint32_t> axisDimensions = { 1 };
- int32_t axisValues[] = { 2 };
- int32_t keepDims = 1;
- TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, { 2.0f, 2.0f } };
+ DOCTEST_TEST_CASE("MeanNoKeepDimsTest_CpuRef")
+ {
+ TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+ { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+ 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+ 20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 2 };
+ int32_t axisValues[] = { 0, 1 };
+ int32_t keepDims = 0;
+ TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("MeanKeepDimsTest_CpuRef")
+ {
+ TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 1 };
+ int32_t axisValues[] = { 2 };
+ int32_t keepDims = 1;
+ TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, { 2.0f, 2.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("MeanFp16EnabledNoKeepDimsTest_CpuRef")
+ {
+ TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+ { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+ 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+ 20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 2 };
+ int32_t axisValues[] = { 0, 1 };
+ int32_t keepDims = 0;
+ TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("MeanFp16EnabledKeepDimsTest_CpuRef")
+ {
+ TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 1 };
+ int32_t axisValues[] = { 2 };
+ int32_t keepDims = 1;
+ TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, { 2.0f, 2.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuRef);
+ }
- MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, sample);
}
-BOOST_DATA_TEST_CASE(MeanFp16NoKeepDimsTest, COMPUTE_DEVICES)
+#ifdef ARMCOMPUTECL_ENABLED
+DOCTEST_TEST_SUITE("MeanTests_CpuAcc")
{
- TestTensor input{ armnn::TensorShape{ 4, 3, 2 }, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
- 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
- 20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
- hidl_vec<uint32_t> axisDimensions = { 2 };
- int32_t axisValues[] = { 0, 1 };
- int32_t keepDims = 0;
- TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
-
- MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, sample);
+ DOCTEST_TEST_CASE("MeanNoKeepDimsTest_CpuAcc")
+ {
+ TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+ { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+ 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+ 20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 2 };
+ int32_t axisValues[] = { 0, 1 };
+ int32_t keepDims = 0;
+ TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::CpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("MeanKeepDimsTest_CpuAcc")
+ {
+ TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 1 };
+ int32_t axisValues[] = { 2 };
+ int32_t keepDims = 1;
+ TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, { 2.0f, 2.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::CpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("MeanFp16EnabledNoKeepDimsTest_CpuAcc")
+ {
+ TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+ { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+ 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+ 20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 2 };
+ int32_t axisValues[] = { 0, 1 };
+ int32_t keepDims = 0;
+ TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("MeanFp16EnabledKeepDimsTest_CpuAcc")
+ {
+ TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 1 };
+ int32_t axisValues[] = { 2 };
+ int32_t keepDims = 1;
+ TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, { 2.0f, 2.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuAcc);
+ }
}
-BOOST_DATA_TEST_CASE(MeanFp16KeepDimsTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("MeanTests_GpuAcc")
{
- TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
- hidl_vec<uint32_t> axisDimensions = { 1 };
- int32_t axisValues[] = { 2 };
- int32_t keepDims = 1;
- TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, { 2.0f, 2.0f } };
+ DOCTEST_TEST_CASE("MeanNoKeepDimsTest_GpuAcc")
+ {
+ TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+ { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+ 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+ 20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 2 };
+ int32_t axisValues[] = { 0, 1 };
+ int32_t keepDims = 0;
+ TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::GpuAcc);
+ }
- MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, sample);
-}
+ DOCTEST_TEST_CASE("MeanKeepDimsTest_GpuAcc")
+ {
+ TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 1 };
+ int32_t axisValues[] = { 2 };
+ int32_t keepDims = 1;
+ TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, { 2.0f, 2.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("MeanFp16EnabledNoKeepDimsTest_GpuAcc")
+ {
+ TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+ { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+ 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+ 20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 2 };
+ int32_t axisValues[] = { 0, 1 };
+ int32_t keepDims = 0;
+ TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::GpuAcc);
+ }
-BOOST_AUTO_TEST_SUITE_END()
+ DOCTEST_TEST_CASE("MeanFp16EnabledKeepDimsTest_GpuAcc")
+ {
+ TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+ hidl_vec<uint32_t> axisDimensions = { 1 };
+ int32_t axisValues[] = { 2 };
+ int32_t keepDims = 1;
+ TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, { 2.0f, 2.0f } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::GpuAcc);
+ }
+}
+#endif
diff --git a/test/1.1/Transpose.cpp b/test/1.1/Transpose.cpp
index 206f9b98..5499e0d6 100644
--- a/test/1.1/Transpose.cpp
+++ b/test/1.1/Transpose.cpp
@@ -1,24 +1,18 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
-#include "OperationsUtils.h"
#include "../DriverTestHelpers.hpp"
#include "../TestTensor.hpp"
-
-#include "../1.1/HalPolicy.hpp"
-
-#include <boost/test/unit_test.hpp>
-#include <boost/test/data/test_case.hpp>
+#include <1.1/HalPolicy.hpp>
#include <log/log.h>
+#include <OperationsUtils.h>
#include <array>
#include <cmath>
-BOOST_AUTO_TEST_SUITE(TransposeTests)
-
using namespace android::hardware;
using namespace driverTestHelpers;
using namespace armnn_driver;
@@ -29,12 +23,6 @@ using RequestArgument = V1_0::RequestArgument;
namespace
{
-#ifndef ARMCOMPUTECL_ENABLED
- static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
- static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::GpuAcc }};
-#endif
-
void TransposeTestImpl(const TestTensor & inputs, int32_t perm[],
const TestTensor & expectedOutputTensor, armnn::Compute computeDevice)
{
@@ -98,38 +86,100 @@ void TransposeTestImpl(const TestTensor & inputs, int32_t perm[],
const float * expectedOutput = expectedOutputTensor.GetData();
for (unsigned int i = 0; i < expectedOutputTensor.GetNumElements(); ++i)
{
- BOOST_TEST(outdata[i] == expectedOutput[i]);
+ DOCTEST_CHECK(outdata[i] == expectedOutput[i]);
}
}
} // namespace
-BOOST_DATA_TEST_CASE(Transpose , COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("TransposeTests_CpuRef")
{
- int32_t perm[] = {2, 3, 1, 0};
- TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
- TestTensor expected{armnn::TensorShape{2, 2, 2, 1},{1, 5, 2, 6, 3, 7, 4, 8}};
+ DOCTEST_TEST_CASE("Transpose_CpuRef")
+ {
+ int32_t perm[] = {2, 3, 1, 0};
+ TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+ TestTensor expected{armnn::TensorShape{2, 2, 2, 1},{1, 5, 2, 6, 3, 7, 4, 8}};
+
+ TransposeTestImpl(input, perm, expected, armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("TransposeNHWCToArmNN_CpuRef")
+ {
+ int32_t perm[] = {0, 3, 1, 2};
+ TestTensor input{armnn::TensorShape{1, 2, 2, 3},{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}};
+ TestTensor expected{armnn::TensorShape{1, 3, 2, 2},{1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33}};
- TransposeTestImpl(input, perm, expected, sample);
+ TransposeTestImpl(input, perm, expected, armnn::Compute::CpuRef);
+ }
+ DOCTEST_TEST_CASE("TransposeArmNNToNHWC_CpuRef")
+ {
+ int32_t perm[] = {0, 2, 3, 1};
+ TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+ TestTensor expected{armnn::TensorShape{1, 2, 2, 2},{1, 5, 2, 6, 3, 7, 4, 8}};
+
+ TransposeTestImpl(input, perm, expected, armnn::Compute::CpuRef);
+ }
}
-BOOST_DATA_TEST_CASE(TransposeNHWCToArmNN , COMPUTE_DEVICES)
+#ifdef ARMCOMPUTECL_ENABLED
+DOCTEST_TEST_SUITE("TransposeTests_CpuAcc")
{
- int32_t perm[] = {0, 3, 1, 2};
- TestTensor input{armnn::TensorShape{1, 2, 2, 3},{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}};
- TestTensor expected{armnn::TensorShape{1, 3, 2, 2},{1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33}};
+ DOCTEST_TEST_CASE("Transpose_CpuAcc")
+ {
+ int32_t perm[] = {2, 3, 1, 0};
+ TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+ TestTensor expected{armnn::TensorShape{2, 2, 2, 1},{1, 5, 2, 6, 3, 7, 4, 8}};
+
+ TransposeTestImpl(input, perm, expected, armnn::Compute::CpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("TransposeNHWCToArmNN_CpuAcc")
+ {
+ int32_t perm[] = {0, 3, 1, 2};
+ TestTensor input{armnn::TensorShape{1, 2, 2, 3},{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}};
+ TestTensor expected{armnn::TensorShape{1, 3, 2, 2},{1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33}};
+
+ TransposeTestImpl(input, perm, expected, armnn::Compute::CpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("TransposeArmNNToNHWC_CpuAcc")
+ {
+ int32_t perm[] = {0, 2, 3, 1};
+ TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+ TestTensor expected{armnn::TensorShape{1, 2, 2, 2},{1, 5, 2, 6, 3, 7, 4, 8}};
- TransposeTestImpl(input, perm, expected, sample);
+ TransposeTestImpl(input, perm, expected, armnn::Compute::CpuAcc);
+ }
}
-BOOST_DATA_TEST_CASE(TransposeArmNNToNHWC , COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("TransposeTests_GpuAcc")
{
- int32_t perm[] = {0, 2, 3, 1};
- TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
- TestTensor expected{armnn::TensorShape{1, 2, 2, 2},{1, 5, 2, 6, 3, 7, 4, 8}};
+ DOCTEST_TEST_CASE("Transpose_GpuAcc")
+ {
+ int32_t perm[] = {2, 3, 1, 0};
+ TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+ TestTensor expected{armnn::TensorShape{2, 2, 2, 1},{1, 5, 2, 6, 3, 7, 4, 8}};
- TransposeTestImpl(input, perm, expected, sample);
-}
+ TransposeTestImpl(input, perm, expected, armnn::Compute::GpuAcc);
+ }
-BOOST_AUTO_TEST_SUITE_END()
+ DOCTEST_TEST_CASE("TransposeNHWCToArmNN_GpuAcc")
+ {
+ int32_t perm[] = {0, 3, 1, 2};
+ TestTensor input{armnn::TensorShape{1, 2, 2, 3},{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}};
+ TestTensor expected{armnn::TensorShape{1, 3, 2, 2},{1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33}};
+
+ TransposeTestImpl(input, perm, expected, armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("TransposeArmNNToNHWC_GpuAcc")
+ {
+ int32_t perm[] = {0, 2, 3, 1};
+ TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+ TestTensor expected{armnn::TensorShape{1, 2, 2, 2},{1, 5, 2, 6, 3, 7, 4, 8}};
+
+ TransposeTestImpl(input, perm, expected, armnn::Compute::GpuAcc);
+ }
+}
+#endif
diff --git a/test/1.2/Capabilities.cpp b/test/1.2/Capabilities.cpp
index 15ecf968..41d5ee53 100644
--- a/test/1.2/Capabilities.cpp
+++ b/test/1.2/Capabilities.cpp
@@ -1,15 +1,12 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
-#include "../../1.2/ArmnnDriverImpl.hpp"
-
+#include "../DriverTestHelpers.hpp"
#include "Utils.h"
-#include <armnn/utility/Assert.hpp>
-
-#include <boost/test/unit_test.hpp>
+#include <1.2/ArmnnDriverImpl.hpp>
#include <sys/system_properties.h>
@@ -62,15 +59,14 @@ void CheckOperandType(const V1_2::Capabilities& capabilities, V1_2::OperandType
{
using namespace armnn_driver::hal_1_2;
V1_0::PerformanceInfo perfInfo = android::nn::lookup(capabilities.operandPerformance, type);
- ARMNN_ASSERT(perfInfo.execTime == execTime);
- ARMNN_ASSERT(perfInfo.powerUsage == powerUsage);
+ DOCTEST_CHECK(perfInfo.execTime == execTime);
+ DOCTEST_CHECK(perfInfo.powerUsage == powerUsage);
}
-BOOST_FIXTURE_TEST_SUITE(CapabilitiesTests, CapabilitiesFixture)
-
-BOOST_AUTO_TEST_CASE(PerformanceCapabilitiesWithRuntime)
+DOCTEST_TEST_SUITE("CapabilitiesTests")
+{
+DOCTEST_TEST_CASE_FIXTURE(CapabilitiesFixture, "PerformanceCapabilitiesWithRuntime")
{
- using namespace armnn_driver::hal_1_2;
using namespace android::nn;
auto getCapabilitiesFn = [&](V1_0::ErrorStatus error, const V1_2::Capabilities& capabilities)
@@ -94,7 +90,8 @@ BOOST_AUTO_TEST_CASE(PerformanceCapabilitiesWithRuntime)
CheckOperandType(capabilities, V1_2::OperandType::OEM, FLT_MAX, FLT_MAX);
CheckOperandType(capabilities, V1_2::OperandType::TENSOR_OEM_BYTE, FLT_MAX, FLT_MAX);
- ARMNN_ASSERT(error == V1_0::ErrorStatus::NONE);
+ bool result = (error == V1_0::ErrorStatus::NONE);
+ DOCTEST_CHECK(result);
};
__system_property_set("Armnn.operandTypeTensorFloat32Performance.execTime", "2.0f");
@@ -121,12 +118,11 @@ BOOST_AUTO_TEST_CASE(PerformanceCapabilitiesWithRuntime)
armnn::IRuntime::CreationOptions options;
armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
- ArmnnDriverImpl::getCapabilities_1_2(runtime, getCapabilitiesFn);
+ armnn_driver::hal_1_2::ArmnnDriverImpl::getCapabilities_1_2(runtime, getCapabilitiesFn);
}
-BOOST_AUTO_TEST_CASE(PerformanceCapabilitiesUndefined)
+DOCTEST_TEST_CASE_FIXTURE(CapabilitiesFixture, "PerformanceCapabilitiesUndefined")
{
- using namespace armnn_driver::hal_1_2;
using namespace android::nn;
float defaultValue = .1f;
@@ -155,13 +151,14 @@ BOOST_AUTO_TEST_CASE(PerformanceCapabilitiesUndefined)
CheckOperandType(capabilities, V1_2::OperandType::OEM, FLT_MAX, FLT_MAX);
CheckOperandType(capabilities, V1_2::OperandType::TENSOR_OEM_BYTE, FLT_MAX, FLT_MAX);
- ARMNN_ASSERT(error == V1_0::ErrorStatus::NONE);
+ bool result = (error == V1_0::ErrorStatus::NONE);
+ DOCTEST_CHECK(result);
};
armnn::IRuntime::CreationOptions options;
armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
- ArmnnDriverImpl::getCapabilities_1_2(runtime, getCapabilitiesFn);
+ armnn_driver::hal_1_2::ArmnnDriverImpl::getCapabilities_1_2(runtime, getCapabilitiesFn);
}
-BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
+} \ No newline at end of file
diff --git a/test/1.2/Dilation.cpp b/test/1.2/Dilation.cpp
index 1a7ba4b4..c9182a7c 100644
--- a/test/1.2/Dilation.cpp
+++ b/test/1.2/Dilation.cpp
@@ -1,17 +1,16 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "../Dilation.hpp"
-#include "../../1.2/HalPolicy.hpp"
+#include <1.2/HalPolicy.hpp>
-#include <boost/test/data/test_case.hpp>
-
-BOOST_AUTO_TEST_SUITE(DilationTests)
+DOCTEST_TEST_SUITE("DilationTests")
+{
-BOOST_AUTO_TEST_CASE(ConvolutionExplicitPaddingNoDilation)
+DOCTEST_TEST_CASE("ConvolutionExplicitPaddingNoDilation")
{
DilationTestOptions options;
options.m_IsDepthwiseConvolution = false;
@@ -21,7 +20,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionExplicitPaddingNoDilation)
DilationTestImpl<hal_1_2::HalPolicy>(options);
}
-BOOST_AUTO_TEST_CASE(ConvolutionExplicitPaddingDilation)
+DOCTEST_TEST_CASE("ConvolutionExplicitPaddingDilation")
{
DilationTestOptions options;
options.m_IsDepthwiseConvolution = false;
@@ -31,7 +30,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionExplicitPaddingDilation)
DilationTestImpl<hal_1_2::HalPolicy>(options);
}
-BOOST_AUTO_TEST_CASE(ConvolutionImplicitPaddingNoDilation)
+DOCTEST_TEST_CASE("ConvolutionImplicitPaddingNoDilation")
{
DilationTestOptions options;
options.m_IsDepthwiseConvolution = false;
@@ -41,7 +40,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionImplicitPaddingNoDilation)
DilationTestImpl<hal_1_2::HalPolicy>(options);
}
-BOOST_AUTO_TEST_CASE(ConvolutionImplicitPaddingDilation)
+DOCTEST_TEST_CASE("ConvolutionImplicitPaddingDilation")
{
DilationTestOptions options;
options.m_IsDepthwiseConvolution = false;
@@ -51,7 +50,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionImplicitPaddingDilation)
DilationTestImpl<hal_1_2::HalPolicy>(options);
}
-BOOST_AUTO_TEST_CASE(DepthwiseConvolutionExplicitPaddingNoDilation)
+DOCTEST_TEST_CASE("DepthwiseConvolutionExplicitPaddingNoDilation")
{
DilationTestOptions options;
options.m_IsDepthwiseConvolution = true;
@@ -61,7 +60,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConvolutionExplicitPaddingNoDilation)
DilationTestImpl<hal_1_2::HalPolicy>(options);
}
-BOOST_AUTO_TEST_CASE(DepthwiseConvolutionExplicitPaddingDilation)
+DOCTEST_TEST_CASE("DepthwiseConvolutionExplicitPaddingDilation")
{
DilationTestOptions options;
options.m_IsDepthwiseConvolution = true;
@@ -71,7 +70,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConvolutionExplicitPaddingDilation)
DilationTestImpl<hal_1_2::HalPolicy>(options);
}
-BOOST_AUTO_TEST_CASE(DepthwiseConvolutionImplicitPaddingNoDilation)
+DOCTEST_TEST_CASE("DepthwiseConvolutionImplicitPaddingNoDilation")
{
DilationTestOptions options;
options.m_IsDepthwiseConvolution = true;
@@ -81,7 +80,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConvolutionImplicitPaddingNoDilation)
DilationTestImpl<hal_1_2::HalPolicy>(options);
}
-BOOST_AUTO_TEST_CASE(DepthwiseConvolutionImplicitPaddingDilation)
+DOCTEST_TEST_CASE("DepthwiseConvolutionImplicitPaddingDilation")
{
DilationTestOptions options;
options.m_IsDepthwiseConvolution = true;
@@ -91,4 +90,4 @@ BOOST_AUTO_TEST_CASE(DepthwiseConvolutionImplicitPaddingDilation)
DilationTestImpl<hal_1_2::HalPolicy>(options);
}
-BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
+} \ No newline at end of file
diff --git a/test/1.2/Lstm.cpp b/test/1.2/Lstm.cpp
index 03f7fe47..7a2b3942 100644
--- a/test/1.2/Lstm.cpp
+++ b/test/1.2/Lstm.cpp
@@ -1,51 +1,72 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "../Lstm.hpp"
-#include <boost/test/data/test_case.hpp>
-
-BOOST_AUTO_TEST_SUITE(LstmTests)
-
using namespace armnn_driver;
-BOOST_DATA_TEST_CASE(LstmNoCifgNoPeepholeNoProjectionTest, COMPUTE_DEVICES)
+#if defined(ARMNNREF_ENABLED)
+DOCTEST_TEST_SUITE("LstmTests_1.2_CpuRef")
{
- LstmNoCifgNoPeepholeNoProjection<hal_1_2::HalPolicy>(sample);
-}
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionTest, COMPUTE_DEVICES)
-{
- LstmCifgPeepholeNoProjection<hal_1_2::HalPolicy>(sample);
-}
+ DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.2_armnn::Compute::CpuRef")
+ {
+ LstmNoCifgNoPeepholeNoProjection<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+ }
-BOOST_DATA_TEST_CASE(LstmNoCifgPeepholeProjectionTest, COMPUTE_DEVICES)
-{
- LstmNoCifgPeepholeProjection<hal_1_2::HalPolicy>(sample);
-}
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.2_CpuRef")
+ {
+ LstmCifgPeepholeNoProjection<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+ }
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionBatch2Test, COMPUTE_DEVICES)
-{
- LstmCifgPeepholeNoProjectionBatch2<hal_1_2::HalPolicy>(sample);
-}
+ DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.2_CpuRef")
+ {
+ LstmNoCifgPeepholeProjection<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+ }
-BOOST_DATA_TEST_CASE(LstmNoCifgPeepholeProjectionNoClippingLayerNormTest, COMPUTE_DEVICES)
-{
- LstmNoCifgPeepholeProjectionNoClippingLayerNorm<hal_1_2::HalPolicy>(sample);
-}
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.2_CpuRef")
+ {
+ LstmCifgPeepholeNoProjectionBatch2<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("QuantizedLstmTest_1.2_CpuRef")
+ {
+ QuantizedLstm<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+ }
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeProjectionNoClippingLayerNormTest, COMPUTE_DEVICES)
-{
- LstmCifgPeepholeProjectionNoClippingLayerNorm<hal_1_2::HalPolicy>(sample);
}
+#endif
#if defined(ARMCOMPUTECL_ENABLED)
-BOOST_DATA_TEST_CASE(QuantizedLstmTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("LstmTests_1.2_GpuAcc")
{
- QuantizedLstm<hal_1_2::HalPolicy>(sample);
+
+ DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.2_GpuAcc")
+ {
+ LstmNoCifgNoPeepholeNoProjection<hal_1_2::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.2_GpuAcc")
+ {
+ LstmCifgPeepholeNoProjection<hal_1_2::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.2_GpuAcc")
+ {
+ LstmNoCifgPeepholeProjection<hal_1_2::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.2_GpuAcc")
+ {
+ LstmCifgPeepholeNoProjectionBatch2<hal_1_2::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("QuantizedLstmTest_1.2_GpuAcc")
+ {
+ QuantizedLstm<hal_1_2::HalPolicy>(armnn::Compute::GpuAcc);
+ }
+
}
#endif
-
-BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/1.2/Mean.cpp b/test/1.2/Mean.cpp
new file mode 100644
index 00000000..a2a8b7a1
--- /dev/null
+++ b/test/1.2/Mean.cpp
@@ -0,0 +1,204 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "../DriverTestHelpers.hpp"
+#include "../TestHalfTensor.hpp"
+
+#include <1.2/HalPolicy.hpp>
+
+#include <array>
+
+using Half = half_float::half;
+
+using namespace android::hardware;
+using namespace driverTestHelpers;
+using namespace armnn_driver;
+
+using HalPolicy = hal_1_2::HalPolicy;
+using RequestArgument = V1_0::RequestArgument;
+
+namespace
+{
+
+void MeanTestImpl(const TestHalfTensor& input,
+ const hidl_vec<uint32_t>& axisDimensions,
+ const int32_t* axisValues,
+ int32_t keepDims,
+ const TestHalfTensor& expectedOutput,
+ bool fp16Enabled,
+ armnn::Compute computeDevice)
+{
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(computeDevice, fp16Enabled));
+
+ HalPolicy::Model model = {};
+
+ AddInputOperand<HalPolicy>(model, input.GetDimensions(), V1_2::OperandType::TENSOR_FLOAT16);
+
+ AddTensorOperand<HalPolicy>(model,
+ axisDimensions,
+ const_cast<int32_t*>(axisValues),
+ HalPolicy::OperandType::TENSOR_INT32);
+
+ AddIntOperand<HalPolicy>(model, keepDims);
+
+ AddOutputOperand<HalPolicy>(model, expectedOutput.GetDimensions(), V1_2::OperandType::TENSOR_FLOAT16);
+
+ model.operations.resize(1);
+ model.operations[0].type = HalPolicy::OperationType::MEAN;
+ model.operations[0].inputs = hidl_vec<uint32_t>{ 0, 1, 2 };
+ model.operations[0].outputs = hidl_vec<uint32_t>{ 3 };
+ model.relaxComputationFloat32toFloat16 = fp16Enabled;
+
+ //android::sp<V1_0::IPreparedModel> preparedModel = PrepareModel(model, *driver);
+ android::sp<V1_2::IPreparedModel> preparedModel = PrepareModel_1_2(model, *driver);
+
+ // The request's memory pools will follow the same order as the inputs
+ V1_0::DataLocation inLoc = {};
+ inLoc.poolIndex = 0;
+ inLoc.offset = 0;
+ inLoc.length = input.GetNumElements() * sizeof(Half);
+ RequestArgument inArg = {};
+ inArg.location = inLoc;
+ inArg.dimensions = input.GetDimensions();
+
+ // An additional memory pool is needed for the output
+ V1_0::DataLocation outLoc = {};
+ outLoc.poolIndex = 1;
+ outLoc.offset = 0;
+ outLoc.length = expectedOutput.GetNumElements() * sizeof(Half);
+ RequestArgument outArg = {};
+ outArg.location = outLoc;
+ outArg.dimensions = expectedOutput.GetDimensions();
+
+ // Make the request based on the arguments
+ V1_0::Request request = {};
+ request.inputs = hidl_vec<RequestArgument>{ inArg };
+ request.outputs = hidl_vec<RequestArgument>{ outArg };
+
+ // Set the input data
+ AddPoolAndSetData(input.GetNumElements(), request, input.GetData());
+
+ // Add memory for the output
+ android::sp<IMemory> outMemory = AddPoolAndGetData<Half>(expectedOutput.GetNumElements(), request);
+ const Half* outputData = static_cast<const Half*>(static_cast<void*>(outMemory->getPointer()));
+
+ if (preparedModel.get() != nullptr)
+ {
+ V1_0::ErrorStatus execStatus = Execute(preparedModel, request);
+ DOCTEST_CHECK((int)execStatus == (int)V1_0::ErrorStatus::NONE);
+ }
+
+ const Half* expectedOutputData = expectedOutput.GetData();
+ for (unsigned int i = 0; i < expectedOutput.GetNumElements(); i++)
+ {
+ DOCTEST_CHECK(outputData[i] == expectedOutputData[i]);
+ }
+}
+
+} // anonymous namespace
+
+DOCTEST_TEST_SUITE("MeanTests_1.2_CpuRef")
+{
+
+DOCTEST_TEST_CASE("MeanFp16NoKeepDimsTest_CpuRef")
+{
+ using namespace half_float::literal;
+
+ TestHalfTensor input{ armnn::TensorShape{ 4, 3, 2 },
+ { 1.0_h, 2.0_h, 3.0_h, 4.0_h, 5.0_h, 6.0_h, 7.0_h, 8.0_h, 9.0_h, 10.0_h,
+ 11.0_h, 12.0_h, 13.0_h, 14.0_h, 15.0_h, 16.0_h, 17.0_h, 18.0_h, 19.0_h,
+ 20.0_h, 21.0_h, 22.0_h, 23.0_h, 24.0_h } };
+ hidl_vec<uint32_t> axisDimensions = { 2 };
+ int32_t axisValues[] = { 0, 1 };
+ int32_t keepDims = 0;
+ TestHalfTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0_h, 13.0_h } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("MeanFp16KeepDimsTest_CpuRef")
+{
+ using namespace half_float::literal;
+
+ TestHalfTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0_h, 1.0_h, 2.0_h, 2.0_h, 3.0_h, 3.0_h } };
+ hidl_vec<uint32_t> axisDimensions = { 1 };
+ int32_t axisValues[] = { 2 };
+ int32_t keepDims = 1;
+ TestHalfTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, { 2.0_h, 2.0_h } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuRef);
+}
+
+}
+
+#ifdef ARMCOMPUTECL_ENABLED
+DOCTEST_TEST_SUITE("MeanTests_1.2_CpuAcc")
+{
+ DOCTEST_TEST_CASE("MeanFp16NoKeepDimsTest_CpuAcc")
+ {
+ using namespace half_float::literal;
+
+ std::vector<Half> in = { 1.0_h, 2.0_h, 3.0_h, 4.0_h, 5.0_h, 6.0_h, 7.0_h, 8.0_h, 9.0_h, 10.0_h,
+ 11.0_h, 12.0_h, 13.0_h, 14.0_h, 15.0_h, 16.0_h, 17.0_h, 18.0_h, 19.0_h,
+ 20.0_h, 21.0_h, 22.0_h, 23.0_h, 24.0_h };
+ TestHalfTensor input{ armnn::TensorShape{ 4, 3, 2 },
+ in};
+ hidl_vec<uint32_t> axisDimensions = { 2 };
+ int32_t axisValues[] = { 0, 1 };
+ int32_t keepDims = 0;
+ std::vector<Half> out = { 12.0_h, 13.0_h };
+ TestHalfTensor expectedOutput{ armnn::TensorShape{ 2 }, out };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("MeanFp16KeepDimsTest_CpuAcc")
+ {
+ using namespace half_float::literal;
+
+ std::vector<Half> in = { 1.0_h, 1.0_h, 2.0_h, 2.0_h, 3.0_h, 3.0_h };
+ TestHalfTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, in };
+ hidl_vec<uint32_t> axisDimensions = { 1 };
+ int32_t axisValues[] = { 2 };
+ int32_t keepDims = 1;
+ std::vector<Half> out = { 2.0_h, 2.0_h };
+ TestHalfTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, out };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuAcc);
+ }
+}
+
+DOCTEST_TEST_SUITE("MeanTests_1.2_GpuAcc")
+{
+ DOCTEST_TEST_CASE("MeanFp16NoKeepDimsTest_GpuAcc")
+ {
+ using namespace half_float::literal;
+
+ TestHalfTensor input{ armnn::TensorShape{ 4, 3, 2 },
+ { 1.0_h, 2.0_h, 3.0_h, 4.0_h, 5.0_h, 6.0_h, 7.0_h, 8.0_h, 9.0_h, 10.0_h,
+ 11.0_h, 12.0_h, 13.0_h, 14.0_h, 15.0_h, 16.0_h, 17.0_h, 18.0_h, 19.0_h,
+ 20.0_h, 21.0_h, 22.0_h, 23.0_h, 24.0_h } };
+ hidl_vec<uint32_t> axisDimensions = { 2 };
+ int32_t axisValues[] = { 0, 1 };
+ int32_t keepDims = 0;
+ TestHalfTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0_h, 13.0_h } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::GpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("MeanFp16KeepDimsTest_GpuAcc")
+ {
+ using namespace half_float::literal;
+
+ TestHalfTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0_h, 1.0_h, 2.0_h, 2.0_h, 3.0_h, 3.0_h } };
+ hidl_vec<uint32_t> axisDimensions = { 1 };
+ int32_t axisValues[] = { 2 };
+ int32_t keepDims = 1;
+ TestHalfTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, { 2.0_h, 2.0_h } };
+
+ MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::GpuAcc);
+ }
+}
+#endif
diff --git a/test/1.2/UnidirectionalSequenceLstm.cpp b/test/1.2/UnidirectionalSequenceLstm.cpp
new file mode 100644
index 00000000..fd35aa41
--- /dev/null
+++ b/test/1.2/UnidirectionalSequenceLstm.cpp
@@ -0,0 +1,40 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "../UnidirectionalSequenceLstm.hpp"
+
+using namespace armnn_driver;
+
+DOCTEST_TEST_SUITE("UnidirectionalSequenceLstmTests_1.2_CpuRef")
+{
+
+ DOCTEST_TEST_CASE("UnidirectionalSequenceLstmLayerFloat32Test_1.2_CpuRef")
+ {
+ UnidirectionalSequenceLstmLayerFloat32TestImpl<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("UnidirectionalSequenceLstmLayerFloat32TimeMajorTest_1.2_CpuRef")
+ {
+ UnidirectionalSequenceLstmLayerFloat32TimeMajorTestImpl<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionTest_1.2_CpuRef")
+ {
+ UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionTestImpl<hal_1_2::HalPolicy>
+ (armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTest_1.2_CpuRef")
+ {
+ UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl<hal_1_2::HalPolicy>
+ (armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("UnidirectionalSequenceLstmWithCifgWithPeepholeNoProjectionTest_1.2_CpuRef")
+ {
+ UnidirectionalSequenceLstmWithCifgWithPeepholeNoProjectionTestImpl<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+ }
+
+} \ No newline at end of file
diff --git a/test/1.3/QLstm.cpp b/test/1.3/QLstm.cpp
index 27e52a60..08466195 100644
--- a/test/1.3/QLstm.cpp
+++ b/test/1.3/QLstm.cpp
@@ -1,23 +1,14 @@
//
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "../DriverTestHelpers.hpp"
-#include "../TestTensor.hpp"
-#include "../1.3/HalPolicy.hpp"
-
-#include <armnn/utility/IgnoreUnused.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include <boost/test/data/test_case.hpp>
-#include <boost/math/special_functions/relative_difference.hpp>
+#include <1.3/HalPolicy.hpp>
#include <array>
-BOOST_AUTO_TEST_SUITE(QLSTMTests)
-
using ArmnnDriver = armnn_driver::ArmnnDriver;
using DriverOptions = armnn_driver::DriverOptions;
@@ -26,6 +17,8 @@ using namespace android::hardware;
using HalPolicy = hal_1_3::HalPolicy;
+static const float TOLERANCE = 1.0f;
+
namespace
{
@@ -42,26 +35,6 @@ RequestArgument CreateRequestArgument(const std::vector<T>& value, unsigned int
return inputRequestArgument;
}
-// Returns true if the relative difference between two float values is less than the tolerance value given.
-// This is used because the floating point comparison tolerance (set on each BOOST_AUTO_TEST_CASE) does not work!
-bool TolerantCompareEqual(float a, float b, float tolerance = 1.0f)
-{
- float rd;
- if (a == 0.0f)
- {
- rd = fabs(b);
- }
- else if (b == 0.0f)
- {
- rd = fabs(a);
- }
- else
- {
- rd = boost::math::relative_difference(a, b);
- }
- return rd < tolerance;
-}
-
// Helper function to create an OperandLifeTime::NO_VALUE for testing.
// To be used on optional input operands that have no values - these are valid and should be tested.
HalPolicy::OperandLifeTime CreateNoValueLifeTime(const hidl_vec<uint32_t>& dimensions)
@@ -85,12 +58,6 @@ void ExecuteModel(const armnn_driver::hal_1_3::HalPolicy::Model& model,
}
}
-#ifndef ARMCOMPUTECL_ENABLED
-static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
-static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::CpuAcc }};
-#endif
-
// Add our own tests here since we skip the qlstm tests which Google supplies (because of non-const weights)
void QLstmTestImpl(const hidl_vec<uint32_t>& inputDimensions,
const std::vector<int8_t>& inputValue,
@@ -527,8 +494,9 @@ void QLstmTestImpl(const hidl_vec<uint32_t>& inputDimensions,
// check the results
for (size_t i = 0; i < outputStateOutValue.size(); ++i)
{
- BOOST_TEST(TolerantCompareEqual(outputStateOutValue[i], outputStateOutData[i]),
- "outputStateOut[" << i << "]: " << outputStateOutValue[i] << " != " << outputStateOutData[i]);
+ DOCTEST_CHECK_MESSAGE(outputStateOutValue[i] == doctest::Approx( outputStateOutData[i] ).epsilon(TOLERANCE),
+ "outputStateOut[" << i << "]: " << outputStateOutValue[i] << " != "
+ << outputStateOutData[i]);
}
// CELL STATE OUTPUT Does not match currently: IVGCVSW-4860 Verify remaining VTS tests (2) for QLSTM
@@ -541,8 +509,8 @@ void QLstmTestImpl(const hidl_vec<uint32_t>& inputDimensions,
for (size_t i = 0; i < outputValue.size(); ++i)
{
- BOOST_TEST(TolerantCompareEqual(outputValue[i], outputData[i]),
- "output[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+ DOCTEST_CHECK_MESSAGE(outputValue[i] == doctest::Approx( outputData[i] ).epsilon(TOLERANCE),
+ "output[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
}
}
@@ -1028,19 +996,38 @@ void DynamicOutputQLstmWithNoProjection(armnn::Compute compute)
} // anonymous namespace
// Support is not added yet
-//BOOST_DATA_TEST_CASE(QLSTMWithProjectionTest, COMPUTE_DEVICES)
+//TEST_CASE(QLSTMWithProjectionTest, COMPUTE_DEVICES)
//{
// QLstmWithProjection(sample);
//}
-BOOST_DATA_TEST_CASE(QLSTMWithNoProjectionTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("QLSTMTests_CpuRef")
{
- QLstmWithNoProjection(sample);
-}
-BOOST_DATA_TEST_CASE(DynamicOutputQLSTMWithNoProjectionTest, COMPUTE_DEVICES)
-{
- DynamicOutputQLstmWithNoProjection(sample);
+ DOCTEST_TEST_CASE("QLSTMWithNoProjectionTest_CpuRef")
+ {
+ QLstmWithNoProjection(armnn::Compute::CpuRef);
+ }
+
+ DOCTEST_TEST_CASE("DynamicOutputQLstmWithNoProjection_CpuRef")
+ {
+ DynamicOutputQLstmWithNoProjection(armnn::Compute::CpuRef);
+ }
+
}
+#ifdef ARMCOMPUTECL_ENABLED
+DOCTEST_TEST_SUITE("QLSTMTests_CpuAcc")
+{
-BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
+ DOCTEST_TEST_CASE("QLSTMWithNoProjectionTest_CpuAcc")
+ {
+ QLstmWithNoProjection(armnn::Compute::CpuAcc);
+ }
+
+ DOCTEST_TEST_CASE("DynamicOutputQLstmWithNoProjection_CpuAcc")
+ {
+ DynamicOutputQLstmWithNoProjection(armnn::Compute::CpuAcc);
+ }
+
+}
+#endif
diff --git a/test/1.3/QosTests.cpp b/test/1.3/QosTests.cpp
index 9fd66880..cd8ac33c 100644
--- a/test/1.3/QosTests.cpp
+++ b/test/1.3/QosTests.cpp
@@ -4,18 +4,11 @@
//
#include "../DriverTestHelpers.hpp"
-#include "../TestTensor.hpp"
-#include "../1.3/HalPolicy.hpp"
-
-#include <armnn/utility/IgnoreUnused.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include <boost/test/data/test_case.hpp>
-
-
-BOOST_AUTO_TEST_SUITE(QosTests)
+#include <1.3/HalPolicy.hpp>
+DOCTEST_TEST_SUITE("QosTests")
+{
using ArmnnDriver = armnn_driver::ArmnnDriver;
using DriverOptions = armnn_driver::DriverOptions;
@@ -40,13 +33,7 @@ void ExecuteModel(const armnn_driver::hal_1_3::HalPolicy::Model& model,
}
}
-#ifndef ARMCOMPUTECL_ENABLED
-static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
-static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::CpuAcc }};
-#endif
-
-BOOST_AUTO_TEST_CASE(ConcurrentExecuteWithQosPriority)
+DOCTEST_TEST_CASE("ConcurrentExecuteWithQosPriority")
{
ALOGI("ConcurrentExecuteWithQOSPriority: entry");
@@ -102,7 +89,7 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecuteWithQosPriority)
preparedModelsSize++;
}
- BOOST_TEST(maxRequests == preparedModelsSize);
+ DOCTEST_CHECK(maxRequests == preparedModelsSize);
// construct the request data
V1_0::DataLocation inloc = {};
@@ -162,7 +149,7 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecuteWithQosPriority)
ALOGI("ConcurrentExecuteWithQOSPriority: waiting for callbacks");
for (size_t i = 0; i < maxRequests; ++i)
{
- ARMNN_ASSERT(cb[i]);
+ DOCTEST_CHECK(cb[i]);
cb[i]->wait();
}
@@ -172,15 +159,15 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecuteWithQosPriority)
{
if (i < 15)
{
- BOOST_TEST(outdata[i][0] == 152);
+ DOCTEST_CHECK(outdata[i][0] == 152);
}
else if (i < 30)
{
- BOOST_TEST(outdata[i][0] == 141);
+ DOCTEST_CHECK(outdata[i][0] == 141);
}
else
{
- BOOST_TEST(outdata[i][0] == 159);
+ DOCTEST_CHECK(outdata[i][0] == 159);
}
}
@@ -189,4 +176,4 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecuteWithQosPriority)
} // anonymous namespace
-BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
+} \ No newline at end of file
diff --git a/test/Android.mk b/test/Android.mk
index 1da26e40..8621182c 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -1,5 +1,5 @@
#
-# Copyright © 2017 ARM Ltd. All rights reserved.
+# Copyright © 2017, 2022 ARM Ltd. All rights reserved.
# SPDX-License-Identifier: MIT
#
@@ -9,6 +9,7 @@ LOCAL_PATH := $(call my-dir)
#
OPENCL_HEADER_PATH := $(LOCAL_PATH)/../../mali/product/khronos/original
ARMNN_HEADER_PATH := $(LOCAL_PATH)/../armnn/include
+ARMNN_PROFILING_PATH := $(LOCAL_PATH)/../armnn/profiling
ARMNN_THIRD_PARTY_PATH := $(LOCAL_PATH)/../armnn/third-party
ARMNN_UTILS_HEADER_PATH := $(LOCAL_PATH)/../armnn/src/armnnUtils
ARMNN_DRIVER_HEADER_PATH := $(LOCAL_PATH)/..
@@ -39,6 +40,7 @@ LOCAL_C_INCLUDES := \
$(OPENCL_HEADER_PATH) \
$(NN_HEADER_PATH) \
$(ARMNN_HEADER_PATH) \
+ $(ARMNN_PROFILING_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
$(ARMNN_UTILS_HEADER_PATH) \
$(ARMNN_DRIVER_HEADER_PATH)
@@ -50,6 +52,22 @@ LOCAL_CFLAGS := \
-O0 \
-UNDEBUG
+# The variable to enable/disable the CL backend (ARMNN_COMPUTE_CL_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMCOMPUTECL_ENABLED
+endif # ARMNN_COMPUTE_CL_ENABLED == 1
+# The variable to enable/disable the NEON backend (ARMNN_COMPUTE_NEON_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_NEON_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMCOMPUTENEON_ENABLED
+endif # ARMNN_COMPUTE_NEON_ENABLED == 1
+# The variable to enable/disable the REFERENCE backend (ARMNN_REF_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_REF_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMNNREF_ENABLED
+endif # ARMNN_REF_ENABLED == 1
+
# Required to build with the changes made to the Android ML framework specific to Android R
ifeq ($(ANDROID_R),1)
LOCAL_CFLAGS+= \
@@ -61,11 +79,6 @@ LOCAL_CFLAGS+= \
-DARMNN_ANDROID_S
endif # S or later
-ifeq ($(Q_OR_LATER),1)
-LOCAL_CFLAGS += \
- -DBOOST_NO_AUTO_PTR
-endif # PLATFORM_VERSION == Q or later
-
LOCAL_SRC_FILES := \
1.0/Convolution2D.cpp \
1.0/FullyConnectedReshape.cpp \
@@ -78,13 +91,14 @@ LOCAL_SRC_FILES := \
DriverTestHelpers.cpp \
SystemProperties.cpp \
Concat.cpp \
- TestTensor.cpp
+ TestTensor.cpp \
+ TestHalfTensor.cpp
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
- libboost_unit_test_framework \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := \
libarmnn-driver@1.0
@@ -155,6 +169,7 @@ LOCAL_C_INCLUDES := \
$(OPENCL_HEADER_PATH) \
$(NN_HEADER_PATH) \
$(ARMNN_HEADER_PATH) \
+ $(ARMNN_PROFILING_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
$(ARMNN_UTILS_HEADER_PATH) \
$(ARMNN_DRIVER_HEADER_PATH)
@@ -167,6 +182,22 @@ LOCAL_CFLAGS := \
-UNDEBUG \
-DARMNN_ANDROID_NN_V1_1
+# The variable to enable/disable the CL backend (ARMNN_COMPUTE_CL_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMCOMPUTECL_ENABLED
+endif # ARMNN_COMPUTE_CL_ENABLED == 1
+# The variable to enable/disable the NEON backend (ARMNN_COMPUTE_NEON_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_NEON_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMCOMPUTENEON_ENABLED
+endif # ARMNN_COMPUTE_NEON_ENABLED == 1
+# The variable to enable/disable the REFERENCE backend (ARMNN_REF_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_REF_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMNNREF_ENABLED
+endif # ARMNN_REF_ENABLED == 1
+
# Required to build with the changes made to the Android ML framework specific to Android R
ifeq ($(ANDROID_R),1)
LOCAL_CFLAGS+= \
@@ -178,11 +209,6 @@ LOCAL_CFLAGS+= \
-DARMNN_ANDROID_S
endif # S or later
-ifeq ($(Q_OR_LATER),1)
-LOCAL_CFLAGS += \
- -DBOOST_NO_AUTO_PTR
-endif # PLATFORM_VERSION == Q or later
-
LOCAL_SRC_FILES := \
1.0/Convolution2D.cpp \
1.1/Convolution2D.cpp \
@@ -198,13 +224,14 @@ LOCAL_SRC_FILES := \
DriverTestHelpers.cpp \
SystemProperties.cpp \
Concat.cpp \
- TestTensor.cpp
+ TestTensor.cpp \
+ TestHalfTensor.cpp
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
- libboost_unit_test_framework \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := \
libarmnn-driver@1.1
@@ -267,6 +294,7 @@ LOCAL_C_INCLUDES := \
$(OPENCL_HEADER_PATH) \
$(NN_HEADER_PATH) \
$(ARMNN_HEADER_PATH) \
+ $(ARMNN_PROFILING_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
$(ARMNN_UTILS_HEADER_PATH) \
$(ARMNN_DRIVER_HEADER_PATH)
@@ -277,9 +305,24 @@ LOCAL_CFLAGS := \
-Werror \
-O0 \
-UNDEBUG \
- -DBOOST_NO_AUTO_PTR \
-DARMNN_ANDROID_NN_V1_2
+# The variable to enable/disable the CL backend (ARMNN_COMPUTE_CL_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMCOMPUTECL_ENABLED
+endif # ARMNN_COMPUTE_CL_ENABLED == 1
+# The variable to enable/disable the NEON backend (ARMNN_COMPUTE_NEON_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_NEON_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMCOMPUTENEON_ENABLED
+endif # ARMNN_COMPUTE_NEON_ENABLED == 1
+# The variable to enable/disable the REFERENCE backend (ARMNN_REF_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_REF_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMNNREF_ENABLED
+endif # ARMNN_REF_ENABLED == 1
+
# Required to build with the changes made to the Android ML framework specific to Android R
ifeq ($(ANDROID_R),1)
LOCAL_CFLAGS+= \
@@ -298,9 +341,11 @@ LOCAL_SRC_FILES := \
1.1/Transpose.cpp \
1.2/Dilation.cpp \
1.2/Capabilities.cpp \
+ 1.2/Mean.cpp \
1.0/Lstm.cpp \
1.1/Lstm.cpp \
1.2/Lstm.cpp \
+ 1.2/UnidirectionalSequenceLstm.cpp \
Tests.cpp \
UtilsTests.cpp \
Concurrent.cpp \
@@ -309,13 +354,14 @@ LOCAL_SRC_FILES := \
DriverTestHelpers.cpp \
SystemProperties.cpp \
Concat.cpp \
- TestTensor.cpp
+ TestTensor.cpp \
+ TestHalfTensor.cpp
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
- libboost_unit_test_framework \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := \
libarmnn-driver@1.2
@@ -374,6 +420,7 @@ LOCAL_C_INCLUDES := \
$(OPENCL_HEADER_PATH) \
$(NN_HEADER_PATH) \
$(ARMNN_HEADER_PATH) \
+ $(ARMNN_PROFILING_PATH) \
$(ARMNN_THIRD_PARTY_PATH) \
$(ARMNN_UTILS_HEADER_PATH) \
$(ARMNN_DRIVER_HEADER_PATH)
@@ -384,9 +431,24 @@ LOCAL_CFLAGS := \
-Werror \
-O0 \
-UNDEBUG \
- -DBOOST_NO_AUTO_PTR \
-DARMNN_ANDROID_NN_V1_3
+# The variable to enable/disable the CL backend (ARMNN_COMPUTE_CL_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMCOMPUTECL_ENABLED
+endif # ARMNN_COMPUTE_CL_ENABLED == 1
+# The variable to enable/disable the NEON backend (ARMNN_COMPUTE_NEON_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_NEON_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMCOMPUTENEON_ENABLED
+endif # ARMNN_COMPUTE_NEON_ENABLED == 1
+# The variable to enable/disable the REFERENCE backend (ARMNN_REF_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_REF_ENABLED),1)
+LOCAL_CFLAGS += \
+ -DARMNNREF_ENABLED
+endif # ARMNN_REF_ENABLED == 1
+
ifeq ($(ANDROID_R),1)
LOCAL_CFLAGS+= \
-DARMNN_ANDROID_R
@@ -404,9 +466,11 @@ LOCAL_SRC_FILES := \
1.1/Transpose.cpp \
1.2/Dilation.cpp \
1.2/Capabilities.cpp \
+ 1.2/Mean.cpp \
1.0/Lstm.cpp \
1.1/Lstm.cpp \
1.2/Lstm.cpp \
+ 1.2/UnidirectionalSequenceLstm.cpp \
1.3/QLstm.cpp \
1.3/QosTests.cpp \
Tests.cpp \
@@ -417,13 +481,14 @@ LOCAL_SRC_FILES := \
DriverTestHelpers.cpp \
SystemProperties.cpp \
Concat.cpp \
- TestTensor.cpp
+ TestTensor.cpp \
+ TestHalfTensor.cpp
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
- libboost_unit_test_framework \
libflatbuffers-framework \
- arm_compute_library
+ arm_compute_library \
+ $(ARMNN_BACKEND_STATIC_LIBRARIES)
LOCAL_WHOLE_STATIC_LIBRARIES := \
libarmnn-driver@1.3
diff --git a/test/Concat.cpp b/test/Concat.cpp
index 54ee8a23..fc4a56cf 100644
--- a/test/Concat.cpp
+++ b/test/Concat.cpp
@@ -1,21 +1,14 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
+
#include "DriverTestHelpers.hpp"
#include "TestTensor.hpp"
-#include "../1.0/HalPolicy.hpp"
-
-#include <boost/test/unit_test.hpp>
-#include <boost/test/data/test_case.hpp>
-
#include <array>
#include <log/log.h>
-
-BOOST_AUTO_TEST_SUITE(ConcatTests)
-
using namespace android::hardware;
using namespace driverTestHelpers;
using namespace armnn_driver;
@@ -26,12 +19,6 @@ using RequestArgument = V1_0::RequestArgument;
namespace
{
-#ifndef ARMCOMPUTECL_ENABLED
- static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
- static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::GpuAcc }};
-#endif
-
void
ConcatTestImpl(const std::vector<const TestTensor*> & inputs,
int32_t concatAxis,
@@ -61,19 +48,19 @@ ConcatTestImpl(const std::vector<const TestTensor*> & inputs,
model.operations[0].outputs = hidl_vec<uint32_t>{static_cast<uint32_t>(inputs.size()+1)};
// make the prepared model
- V1_0::ErrorStatus prepareStatus=V1_0::ErrorStatus::NONE;
+ V1_0::ErrorStatus prepareStatus = V1_0::ErrorStatus::NONE;
android::sp<V1_0::IPreparedModel> preparedModel = PrepareModelWithStatus(model,
*driver,
prepareStatus,
expectedPrepareStatus);
- BOOST_TEST(prepareStatus == expectedPrepareStatus);
+ DOCTEST_CHECK((int)prepareStatus == (int)expectedPrepareStatus);
if (prepareStatus != V1_0::ErrorStatus::NONE)
{
// prepare failed, we cannot continue
return;
}
- BOOST_TEST(preparedModel.get() != nullptr);
+ DOCTEST_CHECK(preparedModel.get() != nullptr);
if (preparedModel.get() == nullptr)
{
// don't spoil other tests if prepare failed
@@ -130,9 +117,9 @@ ConcatTestImpl(const std::vector<const TestTensor*> & inputs,
float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
// run the execution
- ARMNN_ASSERT(preparedModel.get() != nullptr);
+ DOCTEST_CHECK(preparedModel.get() != nullptr);
auto execStatus = Execute(preparedModel, request, expectedExecStatus);
- BOOST_TEST(execStatus == expectedExecStatus);
+ DOCTEST_CHECK((int)execStatus == (int)expectedExecStatus);
if (execStatus == V1_0::ErrorStatus::NONE)
{
@@ -140,359 +127,607 @@ ConcatTestImpl(const std::vector<const TestTensor*> & inputs,
const float * expectedOutput = expectedOutputTensor.GetData();
for (unsigned int i=0; i<expectedOutputTensor.GetNumElements();++i)
{
- BOOST_TEST(outdata[i] == expectedOutput[i]);
+ DOCTEST_CHECK(outdata[i] == expectedOutput[i]);
}
}
}
-} // namespace <anonymous>
-
-
-BOOST_DATA_TEST_CASE(SimpleConcatAxis0, COMPUTE_DEVICES)
+/// Test cases...
+void SimpleConcatAxis0(armnn::Compute computeDevice)
{
int32_t axis = 0;
- TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
- TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {1}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 1, 1}, {2}};
- TestTensor expected{armnn::TensorShape{3,1,1,1},{0,1,2}};
-
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ TestTensor expected{armnn::TensorShape{3, 1, 1, 1}, {0, 1, 2}};
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(ConcatAxis0_NoInterleave, COMPUTE_DEVICES)
+void ConcatAxis0NoInterleave(armnn::Compute computeDevice)
{
int32_t axis = 0;
- TestTensor aIn{armnn::TensorShape{2,1,2,1},{0, 1,
- 2, 3}};
- TestTensor bIn{armnn::TensorShape{3,1,2,1},{4, 5,
- 6, 7,
- 8, 9}};
- TestTensor cIn{armnn::TensorShape{1,1,2,1},{10, 11}};
-
- TestTensor expected{armnn::TensorShape{6,1,2,1},{0, 1,
- 2, 3,
- 4, 5,
- 6, 7,
- 8, 9,
- 10, 11}};
-
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ TestTensor aIn{armnn::TensorShape{2, 1, 2, 1}, {0, 1,
+ 2, 3}};
+ TestTensor bIn{armnn::TensorShape{3, 1, 2, 1}, {4, 5,
+ 6, 7,
+ 8, 9}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10, 11}};
+
+ TestTensor expected{armnn::TensorShape{6, 1, 2, 1}, {0, 1,
+ 2, 3,
+ 4, 5,
+ 6, 7,
+ 8, 9,
+ 10, 11}};
+
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxis1, COMPUTE_DEVICES)
+void SimpleConcatAxis1(armnn::Compute computeDevice)
{
int32_t axis = 1;
- TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
- TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {1}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 1, 1}, {2}};
- TestTensor expected{armnn::TensorShape{1,3,1,1},{0,1,2}};
+ TestTensor expected{armnn::TensorShape{1, 3, 1, 1}, {0, 1, 2}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(ConcatAxis1_NoInterleave, COMPUTE_DEVICES)
+void ConcatAxis1NoInterleave(armnn::Compute computeDevice)
{
int32_t axis = 1;
- TestTensor aIn{armnn::TensorShape{1,2,2,1},{0, 1,
- 2, 3}};
- TestTensor bIn{armnn::TensorShape{1,3,2,1},{4, 5,
- 6, 7,
- 8, 9}};
- TestTensor cIn{armnn::TensorShape{1,1,2,1},{10, 11}};
-
- TestTensor expected{armnn::TensorShape{1,6,2,1},{0, 1,
- 2, 3,
- 4, 5,
- 6, 7,
- 8, 9,
- 10, 11}};
-
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ TestTensor aIn{armnn::TensorShape{1, 2, 2, 1}, {0, 1,
+ 2, 3}};
+ TestTensor bIn{armnn::TensorShape{1, 3, 2, 1}, {4, 5,
+ 6, 7,
+ 8, 9}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10, 11}};
+
+ TestTensor expected{armnn::TensorShape{1, 6, 2, 1}, {0, 1,
+ 2, 3,
+ 4, 5,
+ 6, 7,
+ 8, 9,
+ 10, 11}};
+
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxis1_DoInterleave, COMPUTE_DEVICES)
+void SimpleConcatAxis1DoInterleave(armnn::Compute computeDevice)
{
int32_t axis = 1;
- TestTensor aIn{armnn::TensorShape{2,2,1,1},{0, 1,
- 2, 3}};
- TestTensor bIn{armnn::TensorShape{2,3,1,1},{4, 5, 6,
- 7, 8, 9}};
- TestTensor cIn{armnn::TensorShape{2,1,1,1},{10,
- 11}};
-
- TestTensor expected{armnn::TensorShape{2,6,1,1},{0, 1, 4, 5, 6, 10,
- 2, 3, 7, 8, 9, 11}};
+ TestTensor aIn{armnn::TensorShape{2, 2, 1, 1}, {0, 1,
+ 2, 3}};
+ TestTensor bIn{armnn::TensorShape{2, 3, 1, 1}, {4, 5, 6,
+ 7, 8, 9}};
+ TestTensor cIn{armnn::TensorShape{2, 1, 1, 1}, {10,
+ 11}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ TestTensor expected{armnn::TensorShape{2, 6, 1, 1}, {0, 1, 4, 5, 6, 10,
+ 2, 3, 7, 8, 9, 11}};
+
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxis2, COMPUTE_DEVICES)
+void SimpleConcatAxis2(armnn::Compute computeDevice)
{
int32_t axis = 2;
- TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
- TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {1}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 1, 1}, {2}};
- TestTensor expected{armnn::TensorShape{1,1,3,1},{0,1,2}};
+ TestTensor expected{armnn::TensorShape{1, 1, 3, 1}, {0, 1, 2}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(ConcatAxis2_NoInterleave, COMPUTE_DEVICES)
+void ConcatAxis2NoInterleave(armnn::Compute computeDevice)
{
int32_t axis = 2;
- TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1,
- 2, 3}};
- TestTensor bIn{armnn::TensorShape{1,1,3,2},{4, 5,
- 6, 7,
- 8, 9}};
- TestTensor cIn{armnn::TensorShape{1,1,1,2},{10, 11}};
-
- TestTensor expected{armnn::TensorShape{1,1,6,2},{0, 1,
- 2, 3,
- 4, 5,
- 6, 7,
- 8, 9,
- 10, 11}};
-
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+ 2, 3}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 3, 2}, {4, 5,
+ 6, 7,
+ 8, 9}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 1, 2}, {10, 11}};
+
+ TestTensor expected{armnn::TensorShape{1, 1, 6, 2}, {0, 1,
+ 2, 3,
+ 4, 5,
+ 6, 7,
+ 8, 9,
+ 10, 11}};
+
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxis2_DoInterleave, COMPUTE_DEVICES)
+void SimpleConcatAxis2DoInterleave(armnn::Compute computeDevice)
{
int32_t axis = 2;
- TestTensor aIn{armnn::TensorShape{1,2,2,1},{0, 1,
- 2, 3}};
- TestTensor bIn{armnn::TensorShape{1,2,3,1},{4, 5, 6,
- 7, 8, 9}};
- TestTensor cIn{armnn::TensorShape{1,2,1,1},{10,
- 11}};
-
- TestTensor expected{armnn::TensorShape{1,2,6,1},{0, 1, 4, 5, 6, 10,
- 2, 3, 7, 8, 9, 11}};
+ TestTensor aIn{armnn::TensorShape{1, 2, 2, 1}, {0, 1,
+ 2, 3}};
+ TestTensor bIn{armnn::TensorShape{1, 2, 3, 1}, {4, 5, 6,
+ 7, 8, 9}};
+ TestTensor cIn{armnn::TensorShape{1, 2, 1, 1}, {10,
+ 11}};
+
+ TestTensor expected{armnn::TensorShape{1, 2, 6, 1}, {0, 1, 4, 5, 6, 10,
+ 2, 3, 7, 8, 9, 11}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxis3, COMPUTE_DEVICES)
+void SimpleConcatAxis3(armnn::Compute computeDevice)
{
int32_t axis = 3;
- TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
- TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {1}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 1, 1}, {2}};
- TestTensor expected{armnn::TensorShape{1,1,1,3},{0,1,2}};
+ TestTensor expected{armnn::TensorShape{1, 1, 1, 3}, {0, 1, 2}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxis3_DoInterleave, COMPUTE_DEVICES)
+void SimpleConcatAxis3DoInterleave(armnn::Compute computeDevice)
{
int32_t axis = 3;
- TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1,
- 2, 3}};
- TestTensor bIn{armnn::TensorShape{1,1,2,3},{4, 5, 6,
- 7, 8, 9}};
- TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
- 11}};
-
- TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
- 2, 3, 7, 8, 9, 11}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+ 2, 3}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 2, 3}, {4, 5, 6,
+ 7, 8, 9}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10,
+ 11}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ TestTensor expected{armnn::TensorShape{1, 1, 2, 6}, {0, 1, 4, 5, 6, 10,
+ 2, 3, 7, 8, 9, 11}};
+
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(AxisTooBig, COMPUTE_DEVICES)
+void AxisTooBig(armnn::Compute computeDevice)
{
int32_t axis = 4;
- TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1,1,1},{0}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
// The axis must be within the range of [-rank(values), rank(values))
// see: https://www.tensorflow.org/api_docs/python/tf/concat
- TestTensor uncheckedOutput{armnn::TensorShape{1,1,1,1},{0}};
+ TestTensor uncheckedOutput{armnn::TensorShape{1, 1, 1, 1}, {0}};
V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
- ConcatTestImpl({&aIn, &bIn}, axis, uncheckedOutput, sample, expectedParserStatus);
+ ConcatTestImpl({&aIn, &bIn}, axis, uncheckedOutput, computeDevice, expectedParserStatus);
}
-BOOST_DATA_TEST_CASE(AxisTooSmall, COMPUTE_DEVICES)
+void AxisTooSmall(armnn::Compute computeDevice)
{
int32_t axis = -5;
- TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1,1,1},{0}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
// The axis must be within the range of [-rank(values), rank(values))
// see: https://www.tensorflow.org/api_docs/python/tf/concat
- TestTensor uncheckedOutput{armnn::TensorShape{1,1,1,1},{0}};
+ TestTensor uncheckedOutput{armnn::TensorShape{1, 1, 1, 1}, {0}};
V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
- ConcatTestImpl({&aIn, &bIn}, axis, uncheckedOutput, sample, expectedParserStatus);
+ ConcatTestImpl({&aIn, &bIn}, axis, uncheckedOutput, computeDevice, expectedParserStatus);
}
-BOOST_DATA_TEST_CASE(TooFewInputs, COMPUTE_DEVICES)
+void TooFewInputs(armnn::Compute computeDevice)
{
int32_t axis = 0;
- TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
// We need at least two tensors to concatenate
V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
- ConcatTestImpl({&aIn}, axis, aIn, sample, expectedParserStatus);
+ ConcatTestImpl({&aIn}, axis, aIn, computeDevice, expectedParserStatus);
}
-BOOST_DATA_TEST_CASE(MismatchedInputDimensions, COMPUTE_DEVICES)
+void MismatchedInputDimensions(armnn::Compute computeDevice)
{
int32_t axis = 3;
- TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1,
- 2, 3}};
- TestTensor bIn{armnn::TensorShape{1,1,2,3},{4, 5, 6,
- 7, 8, 9}};
- TestTensor mismatched{armnn::TensorShape{1,1,1,1},{10}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+ 2, 3}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 2, 3}, {4, 5, 6,
+ 7, 8, 9}};
+ TestTensor mismatched{armnn::TensorShape{1, 1, 1, 1}, {10}};
- TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
- 2, 3, 7, 8, 9, 11}};
+ TestTensor expected{armnn::TensorShape{1, 1, 2, 6}, {0, 1, 4, 5, 6, 10,
+ 2, 3, 7, 8, 9, 11}};
// The input dimensions must be compatible
V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
- ConcatTestImpl({&aIn, &bIn, &mismatched}, axis, expected, sample, expectedParserStatus);
+ ConcatTestImpl({&aIn, &bIn, &mismatched}, axis, expected, computeDevice, expectedParserStatus);
}
-BOOST_DATA_TEST_CASE(MismatchedInputRanks, COMPUTE_DEVICES)
+void MismatchedInputRanks(armnn::Compute computeDevice)
{
int32_t axis = 2;
- TestTensor aIn{armnn::TensorShape{1,1,2},{0,1}};
- TestTensor bIn{armnn::TensorShape{1,1},{4}};
- TestTensor expected{armnn::TensorShape{1,1,3},{0,1,4}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 2}, {0, 1}};
+ TestTensor bIn{armnn::TensorShape{1, 1}, {4}};
+ TestTensor expected{armnn::TensorShape{1, 1, 3}, {0, 1, 4}};
// The input dimensions must be compatible
V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
- ConcatTestImpl({&aIn, &bIn}, axis, expected, sample, expectedParserStatus);
+ ConcatTestImpl({&aIn, &bIn}, axis, expected, computeDevice, expectedParserStatus);
}
-BOOST_DATA_TEST_CASE(MismatchedOutputDimensions, COMPUTE_DEVICES)
+void MismatchedOutputDimensions(armnn::Compute computeDevice)
{
int32_t axis = 3;
- TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1,
- 2, 3}};
- TestTensor bIn{armnn::TensorShape{1,1,2,3},{4, 5, 6,
- 7, 8, 9}};
- TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
- 11}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+ 2, 3}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 2, 3}, {4, 5, 6,
+ 7, 8, 9}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10,
+ 11}};
- TestTensor mismatched{armnn::TensorShape{1,1,6,2},{0, 1, 4, 5, 6, 10,
- 2, 3, 7, 8, 9, 11}};
+ TestTensor mismatched{armnn::TensorShape{1, 1, 6, 2}, {0, 1, 4, 5, 6, 10,
+ 2, 3, 7, 8, 9, 11}};
// The input and output dimensions must be compatible
V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, sample, expectedParserStatus);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, computeDevice, expectedParserStatus);
}
-BOOST_DATA_TEST_CASE(MismatchedOutputRank, COMPUTE_DEVICES)
+void MismatchedOutputRank(armnn::Compute computeDevice)
{
int32_t axis = 3;
- TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1,
- 2, 3}};
- TestTensor bIn{armnn::TensorShape{1,1,2,3},{4, 5, 6,
- 7, 8, 9}};
- TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
- 11}};
-
- TestTensor mismatched{armnn::TensorShape{6,2},{0, 1, 4, 5, 6, 10,
- 2, 3, 7, 8, 9, 11}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+ 2, 3}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 2, 3}, {4, 5, 6,
+ 7, 8, 9}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10,
+ 11}};
+
+ TestTensor mismatched{armnn::TensorShape{6, 2}, {0, 1, 4, 5, 6, 10,
+ 2, 3, 7, 8, 9, 11}};
// The input and output ranks must match
V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, sample, expectedParserStatus);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, computeDevice, expectedParserStatus);
}
-BOOST_DATA_TEST_CASE(ValidNegativeAxis, COMPUTE_DEVICES)
+void ValidNegativeAxis(armnn::Compute computeDevice)
{
// this is the same as 3
// see: https://www.tensorflow.org/api_docs/python/tf/concat
int32_t axis = -1;
- TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1,
- 2, 3}};
- TestTensor bIn{armnn::TensorShape{1,1,2,3},{4, 5, 6,
- 7, 8, 9}};
- TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
- 11}};
-
- TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
- 2, 3, 7, 8, 9, 11}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+ 2, 3}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 2, 3}, {4, 5, 6,
+ 7, 8, 9}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10,
+ 11}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ TestTensor expected{armnn::TensorShape{1, 1, 2, 6}, {0, 1, 4, 5, 6, 10,
+ 2, 3, 7, 8, 9, 11}};
+
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxisZero3D, COMPUTE_DEVICES)
+void SimpleConcatAxisZero3D(armnn::Compute computeDevice)
{
int32_t axis = 0;
- TestTensor aIn{armnn::TensorShape{1,1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1,1},{1}};
- TestTensor cIn{armnn::TensorShape{1,1,1},{2}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 1}, {1}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 1}, {2}};
- TestTensor expected{armnn::TensorShape{3,1,1},{0,1,2}};
+ TestTensor expected{armnn::TensorShape{3, 1, 1}, {0, 1, 2}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxisOne3D, COMPUTE_DEVICES)
+void SimpleConcatAxisOne3D(armnn::Compute computeDevice)
{
int32_t axis = 1;
- TestTensor aIn{armnn::TensorShape{1,1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1,1},{1}};
- TestTensor cIn{armnn::TensorShape{1,1,1},{2}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 1}, {1}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 1}, {2}};
- TestTensor expected{armnn::TensorShape{1,3,1},{0,1,2}};
+ TestTensor expected{armnn::TensorShape{1, 3, 1}, {0, 1, 2}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxisTwo3D, COMPUTE_DEVICES)
+void SimpleConcatAxisTwo3D(armnn::Compute computeDevice)
{
int32_t axis = 2;
- TestTensor aIn{armnn::TensorShape{1,1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1,1},{1}};
- TestTensor cIn{armnn::TensorShape{1,1,1},{2}};
+ TestTensor aIn{armnn::TensorShape{1, 1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1, 1}, {1}};
+ TestTensor cIn{armnn::TensorShape{1, 1, 1}, {2}};
- TestTensor expected{armnn::TensorShape{1,1,3},{0,1,2}};
+ TestTensor expected{armnn::TensorShape{1, 1, 3}, {0, 1, 2}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxisZero2D, COMPUTE_DEVICES)
+void SimpleConcatAxisZero2D(armnn::Compute computeDevice)
{
int32_t axis = 0;
- TestTensor aIn{armnn::TensorShape{1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1},{1}};
- TestTensor cIn{armnn::TensorShape{1,1},{2}};
+ TestTensor aIn{armnn::TensorShape{1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1}, {1}};
+ TestTensor cIn{armnn::TensorShape{1, 1}, {2}};
- TestTensor expected{armnn::TensorShape{3,1},{0,1,2}};
+ TestTensor expected{armnn::TensorShape{3, 1}, {0, 1, 2}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxisOne2D, COMPUTE_DEVICES)
+void SimpleConcatAxisOne2D(armnn::Compute computeDevice)
{
int32_t axis = 1;
- TestTensor aIn{armnn::TensorShape{1,1},{0}};
- TestTensor bIn{armnn::TensorShape{1,1},{1}};
- TestTensor cIn{armnn::TensorShape{1,1},{2}};
+ TestTensor aIn{armnn::TensorShape{1, 1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1, 1}, {1}};
+ TestTensor cIn{armnn::TensorShape{1, 1}, {2}};
- TestTensor expected{armnn::TensorShape{1,3},{0,1,2}};
+ TestTensor expected{armnn::TensorShape{1, 3}, {0, 1, 2}};
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
}
-BOOST_DATA_TEST_CASE(SimpleConcatAxisZero1D, COMPUTE_DEVICES)
+void SimpleConcatAxisZero1D(armnn::Compute computeDevice)
{
int32_t axis = 0;
- TestTensor aIn{armnn::TensorShape{1},{0}};
- TestTensor bIn{armnn::TensorShape{1},{1}};
- TestTensor cIn{armnn::TensorShape{1},{2}};
+ TestTensor aIn{armnn::TensorShape{1}, {0}};
+ TestTensor bIn{armnn::TensorShape{1}, {1}};
+ TestTensor cIn{armnn::TensorShape{1}, {2}};
+
+ TestTensor expected{armnn::TensorShape{3}, {0, 1, 2}};
+ ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
+}
+
+} // namespace <anonymous>
+
+DOCTEST_TEST_SUITE("ConcatTests_CpuRef")
+{
+
+DOCTEST_TEST_CASE("SimpleConcatAxis0")
+{
+ SimpleConcatAxis0(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis0NoInterleave")
+{
+ ConcatAxis0NoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis1")
+{
+ SimpleConcatAxis1(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis1NoInterleave")
+{
+ ConcatAxis1NoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis1DoInterleave")
+{
+ SimpleConcatAxis1DoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis2")
+{
+ SimpleConcatAxis2(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis2NoInterleave")
+{
+ ConcatAxis2NoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis2DoInterleave")
+{
+ SimpleConcatAxis2DoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis3")
+{
+ SimpleConcatAxis3(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis3DoInterleave")
+{
+ SimpleConcatAxis3DoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("AxisTooBig")
+{
+ AxisTooBig(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("AxisTooSmall")
+{
+ AxisTooSmall(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("TooFewInputs")
+{
+ TooFewInputs(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("MismatchedInputDimensions")
+{
+ MismatchedInputDimensions(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("MismatchedInputRanks")
+{
+ MismatchedInputRanks(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("MismatchedOutputDimensions")
+{
+ MismatchedOutputDimensions(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("MismatchedOutputRank")
+{
+ MismatchedOutputRank(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("ValidNegativeAxis")
+{
+ ValidNegativeAxis(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisZero3D")
+{
+ SimpleConcatAxisZero3D(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisOne3D")
+{
+ SimpleConcatAxisOne3D(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisTwo3D")
+{
+ SimpleConcatAxisTwo3D(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisZero2D")
+{
+ SimpleConcatAxisZero2D(armnn::Compute::CpuRef);
+}
- TestTensor expected{armnn::TensorShape{3},{0,1,2}};
+DOCTEST_TEST_CASE("SimpleConcatAxisOne2D")
+{
+ SimpleConcatAxisOne2D(armnn::Compute::CpuRef);
+}
- ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+DOCTEST_TEST_CASE("SimpleConcatAxisZero1D")
+{
+ SimpleConcatAxisZero1D(armnn::Compute::CpuRef);
+}
+
+}
+
+#ifdef ARMCOMPUTECL_ENABLED
+DOCTEST_TEST_SUITE("ConcatTests_GpuAcc")
+{
+
+DOCTEST_TEST_CASE("SimpleConcatAxis0")
+{
+ SimpleConcatAxis0(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis0NoInterleave")
+{
+ ConcatAxis0NoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis1")
+{
+ SimpleConcatAxis1(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis1NoInterleave")
+{
+ ConcatAxis1NoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis1DoInterleave")
+{
+ SimpleConcatAxis1DoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis2")
+{
+ SimpleConcatAxis2(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis2NoInterleave")
+{
+ ConcatAxis2NoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis2DoInterleave")
+{
+ SimpleConcatAxis2DoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis3")
+{
+ SimpleConcatAxis3(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis3DoInterleave")
+{
+ SimpleConcatAxis3DoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("AxisTooBig")
+{
+ AxisTooBig(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("AxisTooSmall")
+{
+ AxisTooSmall(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("TooFewInputs")
+{
+ TooFewInputs(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("MismatchedInputDimensions")
+{
+ MismatchedInputDimensions(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("MismatchedInputRanks")
+{
+ MismatchedInputRanks(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("MismatchedOutputDimensions")
+{
+ MismatchedOutputDimensions(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("MismatchedOutputRank")
+{
+ MismatchedOutputRank(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("ValidNegativeAxis")
+{
+ ValidNegativeAxis(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisZero3D")
+{
+ SimpleConcatAxisZero3D(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisOne3D")
+{
+ SimpleConcatAxisOne3D(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisTwo3D")
+{
+ SimpleConcatAxisTwo3D(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisZero2D")
+{
+ SimpleConcatAxisZero2D(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisOne2D")
+{
+ SimpleConcatAxisOne2D(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisZero1D")
+{
+ SimpleConcatAxisZero1D(armnn::Compute::GpuAcc);
}
-BOOST_AUTO_TEST_SUITE_END()
+}// End of GpuAcc Test Suite
+#endif \ No newline at end of file
diff --git a/test/Concurrent.cpp b/test/Concurrent.cpp
index 50ba0e9f..71119cde 100644
--- a/test/Concurrent.cpp
+++ b/test/Concurrent.cpp
@@ -1,17 +1,14 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
-#include "DriverTestHelpers.hpp"
-
-#include "../1.0/HalPolicy.hpp"
-#include <boost/test/unit_test.hpp>
+#include "DriverTestHelpers.hpp"
#include <log/log.h>
-BOOST_AUTO_TEST_SUITE(ConcurrentDriverTests)
-
+DOCTEST_TEST_SUITE("ConcurrentDriverTests")
+{
using ArmnnDriver = armnn_driver::ArmnnDriver;
using DriverOptions = armnn_driver::DriverOptions;
using HalPolicy = armnn_driver::hal_1_0::HalPolicy;
@@ -26,7 +23,7 @@ using namespace armnn_driver;
// The main point of this test is to check that multiple requests can be
// executed without waiting for the callback from previous execution.
// The operations performed are not significant.
-BOOST_AUTO_TEST_CASE(ConcurrentExecute)
+DOCTEST_TEST_CASE("ConcurrentExecute")
{
ALOGI("ConcurrentExecute: entry");
@@ -64,7 +61,7 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute)
}
}
- BOOST_TEST(maxRequests == preparedModelsSize);
+ DOCTEST_CHECK(maxRequests == preparedModelsSize);
// construct the request data
V1_0::DataLocation inloc = {};
@@ -85,15 +82,16 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute)
// build the requests
V1_0::Request requests[maxRequests];
+ android::sp<IMemory> inMemory[maxRequests];
android::sp<IMemory> outMemory[maxRequests];
+ float indata[] = {2, 32, 16};
float* outdata[maxRequests];
for (size_t i = 0; i < maxRequests; ++i)
{
requests[i].inputs = hidl_vec<RequestArgument>{input};
requests[i].outputs = hidl_vec<RequestArgument>{output};
// set the input data (matching source test)
- float indata[] = {2, 32, 16};
- AddPoolAndSetData<float>(3, requests[i], indata);
+ inMemory[i] = AddPoolAndSetData<float>(3, requests[i], indata);
// add memory for the output
outMemory[i] = AddPoolAndGetData<float>(1, requests[i]);
outdata[i] = static_cast<float*>(static_cast<void*>(outMemory[i]->getPointer()));
@@ -111,7 +109,7 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute)
ALOGI("ConcurrentExecute: waiting for callbacks");
for (size_t i = 0; i < maxRequests; ++i)
{
- ARMNN_ASSERT(cb[i]);
+ DOCTEST_CHECK(cb[i]);
cb[i]->wait();
}
@@ -119,9 +117,9 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute)
ALOGI("ConcurrentExecute: validating results");
for (size_t i = 0; i < maxRequests; ++i)
{
- BOOST_TEST(outdata[i][0] == 152);
+ DOCTEST_CHECK(outdata[i][0] == 152);
}
ALOGI("ConcurrentExecute: exit");
}
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/Convolution2D.hpp b/test/Convolution2D.hpp
index c3f9d48c..cc26f68f 100644
--- a/test/Convolution2D.hpp
+++ b/test/Convolution2D.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -7,13 +7,10 @@
#include "DriverTestHelpers.hpp"
-#include <boost/test/unit_test.hpp>
#include <log/log.h>
#include <OperationsUtils.h>
-BOOST_AUTO_TEST_SUITE(Convolution2DTests)
-
using namespace android::hardware;
using namespace driverTestHelpers;
using namespace armnn_driver;
@@ -25,11 +22,11 @@ namespace driverTestHelpers
#define ARMNN_ANDROID_FP16_TEST(result, fp16Expectation, fp32Expectation, fp16Enabled) \
if (fp16Enabled) \
{ \
- BOOST_TEST((result == fp16Expectation || result == fp32Expectation), result << \
+ DOCTEST_CHECK_MESSAGE((result == fp16Expectation || result == fp32Expectation), result << \
" does not match either " << fp16Expectation << "[fp16] or " << fp32Expectation << "[fp32]"); \
} else \
{ \
- BOOST_TEST(result == fp32Expectation); \
+ DOCTEST_CHECK(result == fp32Expectation); \
}
void SetModelFp16Flag(V1_0::Model& model, bool fp16Enabled);
@@ -55,22 +52,22 @@ void PaddingTestImpl(android::nn::PaddingScheme paddingScheme, bool fp16Enabled
// add operands
float weightValue[] = {1.f, -1.f, 0.f, 1.f};
- float biasValue[] = {0.f};
+ float biasValue[] = {0.f};
- AddInputOperand<HalPolicy>(model, hidl_vec<uint32_t>{1, 2, 3, 1});
- AddTensorOperand<HalPolicy>(model, hidl_vec<uint32_t>{1, 2, 2, 1}, weightValue);
- AddTensorOperand<HalPolicy>(model, hidl_vec<uint32_t>{1}, biasValue);
- AddIntOperand<HalPolicy>(model, (int32_t)paddingScheme); // padding
+ AddInputOperand<HalPolicy>(model, hidl_vec < uint32_t > {1, 2, 3, 1});
+ AddTensorOperand<HalPolicy>(model, hidl_vec < uint32_t > {1, 2, 2, 1}, weightValue);
+ AddTensorOperand<HalPolicy>(model, hidl_vec < uint32_t > {1}, biasValue);
+ AddIntOperand<HalPolicy>(model, (int32_t) paddingScheme); // padding
AddIntOperand<HalPolicy>(model, 2); // stride x
AddIntOperand<HalPolicy>(model, 2); // stride y
AddIntOperand<HalPolicy>(model, 0); // no activation
- AddOutputOperand<HalPolicy>(model, hidl_vec<uint32_t>{1, 1, outSize, 1});
+ AddOutputOperand<HalPolicy>(model, hidl_vec < uint32_t > {1, 1, outSize, 1});
// make the convolution operation
model.operations.resize(1);
model.operations[0].type = HalOperationType::CONV_2D;
- model.operations[0].inputs = hidl_vec<uint32_t>{0, 1, 2, 3, 4, 5, 6};
- model.operations[0].outputs = hidl_vec<uint32_t>{7};
+ model.operations[0].inputs = hidl_vec < uint32_t > {0, 1, 2, 3, 4, 5, 6};
+ model.operations[0].outputs = hidl_vec < uint32_t > {7};
// make the prepared model
SetModelFp16Flag(model, fp16Enabled);
@@ -78,24 +75,24 @@ void PaddingTestImpl(android::nn::PaddingScheme paddingScheme, bool fp16Enabled
// construct the request
V1_0::DataLocation inloc = {};
- inloc.poolIndex = 0;
- inloc.offset = 0;
- inloc.length = 6 * sizeof(float);
- RequestArgument input = {};
- input.location = inloc;
- input.dimensions = hidl_vec<uint32_t>{};
+ inloc.poolIndex = 0;
+ inloc.offset = 0;
+ inloc.length = 6 * sizeof(float);
+ RequestArgument input = {};
+ input.location = inloc;
+ input.dimensions = hidl_vec < uint32_t > {};
V1_0::DataLocation outloc = {};
- outloc.poolIndex = 1;
- outloc.offset = 0;
- outloc.length = outSize * sizeof(float);
- RequestArgument output = {};
- output.location = outloc;
- output.dimensions = hidl_vec<uint32_t>{};
+ outloc.poolIndex = 1;
+ outloc.offset = 0;
+ outloc.length = outSize * sizeof(float);
+ RequestArgument output = {};
+ output.location = outloc;
+ output.dimensions = hidl_vec < uint32_t > {};
V1_0::Request request = {};
- request.inputs = hidl_vec<RequestArgument>{input};
- request.outputs = hidl_vec<RequestArgument>{output};
+ request.inputs = hidl_vec < RequestArgument > {input};
+ request.outputs = hidl_vec < RequestArgument > {output};
// set the input data (matching source test)
float indata[] = {1024.25f, 1.f, 0.f, 3.f, -1, -1024.25f};
@@ -114,19 +111,17 @@ void PaddingTestImpl(android::nn::PaddingScheme paddingScheme, bool fp16Enabled
// check the result
switch (paddingScheme)
{
- case android::nn::kPaddingValid:
- ARMNN_ANDROID_FP16_TEST(outdata[0], 1022.f, 1022.25f, fp16Enabled)
- break;
- case android::nn::kPaddingSame:
- ARMNN_ANDROID_FP16_TEST(outdata[0], 1022.f, 1022.25f, fp16Enabled)
- BOOST_TEST(outdata[1] == 0.f);
- break;
- default:
- BOOST_TEST(false);
- break;
+ case android::nn::kPaddingValid:
+ ARMNN_ANDROID_FP16_TEST(outdata[0], 1022.f, 1022.25f, fp16Enabled)
+ break;
+ case android::nn::kPaddingSame:
+ ARMNN_ANDROID_FP16_TEST(outdata[0], 1022.f, 1022.25f, fp16Enabled)
+ DOCTEST_CHECK(outdata[1] == 0.f);
+ break;
+ default:
+ DOCTEST_CHECK(false);
+ break;
}
}
} // namespace driverTestHelpers
-
-BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/Dilation.hpp b/test/Dilation.hpp
index d0189c96..dbd24933 100644
--- a/test/Dilation.hpp
+++ b/test/Dilation.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -7,17 +7,12 @@
#include "DriverTestHelpers.hpp"
-#include <armnn/LayerVisitorBase.hpp>
+#include <armnn/StrategyBase.hpp>
#include <armnn/utility/IgnoreUnused.hpp>
-#include <boost/test/unit_test.hpp>
-
#include <numeric>
-BOOST_AUTO_TEST_SUITE(DilationTests)
-
using namespace armnn;
-using namespace boost;
using namespace driverTestHelpers;
struct DilationTestOptions
@@ -35,7 +30,7 @@ struct DilationTestOptions
bool m_HasDilation;
};
-class DilationTestVisitor : public LayerVisitorBase<VisitorThrowingPolicy>
+class DilationTestVisitor : public StrategyBase<ThrowingStrategy>
{
public:
DilationTestVisitor() :
@@ -47,32 +42,32 @@ public:
m_ExpectedDilationY{expectedDilationY}
{}
- void VisitConvolution2dLayer(const IConnectableLayer *layer,
- const Convolution2dDescriptor& descriptor,
- const ConstTensor& weights,
- const Optional<ConstTensor>& biases,
- const char *name = nullptr) override
- {
- IgnoreUnused(layer);
- IgnoreUnused(weights);
- IgnoreUnused(biases);
- IgnoreUnused(name);
-
- CheckDilationParams(descriptor);
- }
-
- void VisitDepthwiseConvolution2dLayer(const IConnectableLayer *layer,
- const DepthwiseConvolution2dDescriptor& descriptor,
- const ConstTensor& weights,
- const Optional<ConstTensor>& biases,
- const char *name = nullptr) override
+ void ExecuteStrategy(const armnn::IConnectableLayer* layer,
+ const armnn::BaseDescriptor& descriptor,
+ const std::vector<armnn::ConstTensor>& constants,
+ const char* name,
+ const armnn::LayerBindingId id = 0) override
{
- IgnoreUnused(layer);
- IgnoreUnused(weights);
- IgnoreUnused(biases);
- IgnoreUnused(name);
-
- CheckDilationParams(descriptor);
+ armnn::IgnoreUnused(layer, constants, id, name);
+ switch (layer->GetType())
+ {
+ case armnn::LayerType::Constant:
+ break;
+ case armnn::LayerType::Convolution2d:
+ {
+ CheckDilationParams(static_cast<const armnn::Convolution2dDescriptor&>(descriptor));
+ break;
+ }
+ case armnn::LayerType::DepthwiseConvolution2d:
+ {
+ CheckDilationParams(static_cast<const armnn::DepthwiseConvolution2dDescriptor&>(descriptor));
+ break;
+ }
+ default:
+ {
+ m_DefaultStrategy.Apply(GetLayerTypeAsCString(layer->GetType()));
+ }
+ }
}
private:
@@ -82,8 +77,8 @@ private:
template<typename ConvolutionDescriptor>
void CheckDilationParams(const ConvolutionDescriptor& descriptor)
{
- BOOST_CHECK_EQUAL(descriptor.m_DilationX, m_ExpectedDilationX);
- BOOST_CHECK_EQUAL(descriptor.m_DilationY, m_ExpectedDilationY);
+ CHECK_EQ(descriptor.m_DilationX, m_ExpectedDilationX);
+ CHECK_EQ(descriptor.m_DilationY, m_ExpectedDilationY);
}
};
@@ -169,11 +164,9 @@ void DilationTestImpl(const DilationTestOptions& options)
data.m_OutputSlotForOperand = std::vector<IOutputSlot*>(model.operands.size(), nullptr);
bool ok = HalPolicy::ConvertOperation(model.operations[0], model, data);
- BOOST_CHECK(ok);
+ DOCTEST_CHECK(ok);
// check if dilation params are as expected
DilationTestVisitor visitor = options.m_HasDilation ? DilationTestVisitor(2, 2) : DilationTestVisitor();
- data.m_Network->Accept(visitor);
+ data.m_Network->ExecuteStrategy(visitor);
}
-
-BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/DriverTestHelpers.cpp b/test/DriverTestHelpers.cpp
index 8e8d7bef..1f9fc1ee 100644
--- a/test/DriverTestHelpers.cpp
+++ b/test/DriverTestHelpers.cpp
@@ -1,10 +1,10 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
+
#include "DriverTestHelpers.hpp"
#include <log/log.h>
-#include <boost/test/unit_test.hpp>
namespace android
{
@@ -139,10 +139,10 @@ android::sp<V1_0::IPreparedModel> PrepareModelWithStatus(const V1_0::Model& mode
driver.prepareModel(model, cb);
prepareStatus = cb->GetErrorStatus();
- BOOST_TEST(prepareStatus == expectedStatus);
+ DOCTEST_CHECK((int)prepareStatus == (int)expectedStatus);
if (expectedStatus == V1_0::ErrorStatus::NONE)
{
- BOOST_TEST((cb->GetPreparedModel() != nullptr));
+ DOCTEST_CHECK((cb->GetPreparedModel() != nullptr));
}
return cb->GetPreparedModel();
}
@@ -158,10 +158,10 @@ android::sp<V1_0::IPreparedModel> PrepareModelWithStatus(const V1_1::Model& mode
driver.prepareModel_1_1(model, V1_1::ExecutionPreference::LOW_POWER, cb);
prepareStatus = cb->GetErrorStatus();
- BOOST_TEST(prepareStatus == expectedStatus);
+ DOCTEST_CHECK((int)prepareStatus == (int)expectedStatus);
if (expectedStatus == V1_0::ErrorStatus::NONE)
{
- BOOST_TEST((cb->GetPreparedModel() != nullptr));
+ DOCTEST_CHECK((cb->GetPreparedModel() != nullptr));
}
return cb->GetPreparedModel();
}
@@ -184,10 +184,10 @@ android::sp<V1_2::IPreparedModel> PrepareModelWithStatus_1_2(const armnn_driver:
driver.prepareModel_1_2(model, V1_1::ExecutionPreference::LOW_POWER, emptyHandle1, emptyHandle2, emptyToken, cb);
prepareStatus = cb->GetErrorStatus();
- BOOST_TEST(prepareStatus == expectedStatus);
+ DOCTEST_CHECK((int)prepareStatus == (int)expectedStatus);
if (expectedStatus == V1_0::ErrorStatus::NONE)
{
- BOOST_TEST((cb->GetPreparedModel_1_2() != nullptr));
+ DOCTEST_CHECK((cb->GetPreparedModel_1_2() != nullptr));
}
return cb->GetPreparedModel_1_2();
}
@@ -219,7 +219,7 @@ android::sp<V1_3::IPreparedModel> PrepareModelWithStatus_1_3(const armnn_driver:
prepareStatus = cb->Get_1_3_ErrorStatus();
if (prepareStatus == V1_3::ErrorStatus::NONE)
{
- BOOST_TEST((cb->GetPreparedModel_1_3() != nullptr));
+ DOCTEST_CHECK((cb->GetPreparedModel_1_3() != nullptr));
}
return cb->GetPreparedModel_1_3();
}
@@ -230,10 +230,10 @@ V1_0::ErrorStatus Execute(android::sp<V1_0::IPreparedModel> preparedModel,
const V1_0::Request& request,
V1_0::ErrorStatus expectedStatus)
{
- BOOST_TEST(preparedModel.get() != nullptr);
+ DOCTEST_CHECK(preparedModel.get() != nullptr);
android::sp<ExecutionCallback> cb(new ExecutionCallback());
V1_0::ErrorStatus execStatus = preparedModel->execute(request, cb);
- BOOST_TEST(execStatus == expectedStatus);
+ DOCTEST_CHECK((int)execStatus == (int)expectedStatus);
ALOGI("Execute: waiting for callback to be invoked");
cb->wait();
return execStatus;
@@ -242,9 +242,10 @@ V1_0::ErrorStatus Execute(android::sp<V1_0::IPreparedModel> preparedModel,
android::sp<ExecutionCallback> ExecuteNoWait(android::sp<V1_0::IPreparedModel> preparedModel,
const V1_0::Request& request)
{
- BOOST_TEST(preparedModel.get() != nullptr);
+ DOCTEST_CHECK(preparedModel.get() != nullptr);
android::sp<ExecutionCallback> cb(new ExecutionCallback());
- BOOST_TEST(preparedModel->execute(request, cb) == V1_0::ErrorStatus::NONE);
+ V1_0::ErrorStatus execStatus = preparedModel->execute(request, cb);
+ DOCTEST_CHECK((int)execStatus == (int)V1_0::ErrorStatus::NONE);
ALOGI("ExecuteNoWait: returning callback object");
return cb;
}
diff --git a/test/DriverTestHelpers.hpp b/test/DriverTestHelpers.hpp
index d37fbf26..98be0903 100644
--- a/test/DriverTestHelpers.hpp
+++ b/test/DriverTestHelpers.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -10,10 +10,16 @@
#include "../ArmnnDriver.hpp"
#include <iosfwd>
-#include <boost/test/unit_test.hpp>
-
#include <android/hidl/allocator/1.0/IAllocator.h>
+// Some of the short name macros from 'third-party/doctest/doctest.h' clash with macros in
+// 'system/core/base/include/android-base/logging.h' so we use the full DOCTEST macro names
+#ifndef DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES
+#define DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES
+#endif // DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES
+
+#include <doctest/doctest.h>
+
using RequestArgument = V1_0::RequestArgument;
using ::android::hidl::allocator::V1_0::IAllocator;
@@ -167,7 +173,7 @@ android::sp<IMemory> AddPoolAndGetData(uint32_t size, V1_0::Request& request)
android::sp<IAllocator> allocator = IAllocator::getService("ashmem");
allocator->allocate(sizeof(T) * size, [&](bool success, const hidl_memory& mem) {
- BOOST_TEST(success);
+ DOCTEST_CHECK(success);
pool = mem;
});
@@ -180,13 +186,15 @@ android::sp<IMemory> AddPoolAndGetData(uint32_t size, V1_0::Request& request)
}
template<typename T>
-void AddPoolAndSetData(uint32_t size, V1_0::Request& request, const T* data)
+android::sp<IMemory> AddPoolAndSetData(uint32_t size, V1_0::Request& request, const T* data)
{
android::sp<IMemory> memory = AddPoolAndGetData<T>(size, request);
T* dst = static_cast<T*>(static_cast<void*>(memory->getPointer()));
memcpy(dst, data, size * sizeof(T));
+
+ return memory;
}
template<typename HalPolicy,
diff --git a/test/FullyConnected.cpp b/test/FullyConnected.cpp
index a68a5870..4717357b 100644
--- a/test/FullyConnected.cpp
+++ b/test/FullyConnected.cpp
@@ -1,17 +1,14 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
-#include "DriverTestHelpers.hpp"
-
-#include "../1.0/HalPolicy.hpp"
-#include <boost/test/unit_test.hpp>
+#include "DriverTestHelpers.hpp"
#include <log/log.h>
-BOOST_AUTO_TEST_SUITE(FullyConnectedTests)
-
+DOCTEST_TEST_SUITE("FullyConnectedTests")
+{
using namespace android::hardware;
using namespace driverTestHelpers;
using namespace armnn_driver;
@@ -19,7 +16,7 @@ using namespace armnn_driver;
using HalPolicy = hal_1_0::HalPolicy;
// Add our own test here since we fail the fc tests which Google supplies (because of non-const weights)
-BOOST_AUTO_TEST_CASE(FullyConnected)
+DOCTEST_TEST_CASE("FullyConnected")
{
// this should ideally replicate fully_connected_float.model.cpp
// but that uses slightly weird dimensions which I don't think we need to support for now
@@ -83,10 +80,10 @@ BOOST_AUTO_TEST_CASE(FullyConnected)
}
// check the result
- BOOST_TEST(outdata[0] == 152);
+ DOCTEST_CHECK(outdata[0] == 152);
}
-BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput)
+DOCTEST_TEST_CASE("TestFullyConnected4dInput")
{
auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
@@ -165,17 +162,17 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput)
}
// check the result
- BOOST_TEST(outdata[0] == 1);
- BOOST_TEST(outdata[1] == 2);
- BOOST_TEST(outdata[2] == 3);
- BOOST_TEST(outdata[3] == 4);
- BOOST_TEST(outdata[4] == 5);
- BOOST_TEST(outdata[5] == 6);
- BOOST_TEST(outdata[6] == 7);
- BOOST_TEST(outdata[7] == 8);
+ DOCTEST_CHECK(outdata[0] == 1);
+ DOCTEST_CHECK(outdata[1] == 2);
+ DOCTEST_CHECK(outdata[2] == 3);
+ DOCTEST_CHECK(outdata[3] == 4);
+ DOCTEST_CHECK(outdata[4] == 5);
+ DOCTEST_CHECK(outdata[5] == 6);
+ DOCTEST_CHECK(outdata[6] == 7);
+ DOCTEST_CHECK(outdata[7] == 8);
}
-BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape)
+DOCTEST_TEST_CASE("TestFullyConnected4dInputReshape")
{
auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
@@ -254,17 +251,17 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape)
}
// check the result
- BOOST_TEST(outdata[0] == 1);
- BOOST_TEST(outdata[1] == 2);
- BOOST_TEST(outdata[2] == 3);
- BOOST_TEST(outdata[3] == 4);
- BOOST_TEST(outdata[4] == 5);
- BOOST_TEST(outdata[5] == 6);
- BOOST_TEST(outdata[6] == 7);
- BOOST_TEST(outdata[7] == 8);
+ DOCTEST_CHECK(outdata[0] == 1);
+ DOCTEST_CHECK(outdata[1] == 2);
+ DOCTEST_CHECK(outdata[2] == 3);
+ DOCTEST_CHECK(outdata[3] == 4);
+ DOCTEST_CHECK(outdata[4] == 5);
+ DOCTEST_CHECK(outdata[5] == 6);
+ DOCTEST_CHECK(outdata[6] == 7);
+ DOCTEST_CHECK(outdata[7] == 8);
}
-BOOST_AUTO_TEST_CASE(TestFullyConnectedWeightsAsInput)
+DOCTEST_TEST_CASE("TestFullyConnectedWeightsAsInput")
{
auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
@@ -366,14 +363,14 @@ BOOST_AUTO_TEST_CASE(TestFullyConnectedWeightsAsInput)
}
// check the result
- BOOST_TEST(outdata[0] == 1);
- BOOST_TEST(outdata[1] == 2);
- BOOST_TEST(outdata[2] == 3);
- BOOST_TEST(outdata[3] == 4);
- BOOST_TEST(outdata[4] == 5);
- BOOST_TEST(outdata[5] == 6);
- BOOST_TEST(outdata[6] == 7);
- BOOST_TEST(outdata[7] == 8);
+ DOCTEST_CHECK(outdata[0] == 1);
+ DOCTEST_CHECK(outdata[1] == 2);
+ DOCTEST_CHECK(outdata[2] == 3);
+ DOCTEST_CHECK(outdata[3] == 4);
+ DOCTEST_CHECK(outdata[4] == 5);
+ DOCTEST_CHECK(outdata[5] == 6);
+ DOCTEST_CHECK(outdata[6] == 7);
+ DOCTEST_CHECK(outdata[7] == 8);
}
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/GenericLayerTests.cpp b/test/GenericLayerTests.cpp
index 188c7b1c..bd86a885 100644
--- a/test/GenericLayerTests.cpp
+++ b/test/GenericLayerTests.cpp
@@ -1,16 +1,14 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
-#include "DriverTestHelpers.hpp"
-
-#include "../1.0/HalPolicy.hpp"
-#include <boost/test/unit_test.hpp>
+#include "DriverTestHelpers.hpp"
#include <log/log.h>
-BOOST_AUTO_TEST_SUITE(GenericLayerTests)
+DOCTEST_TEST_SUITE("GenericLayerTests")
+{
using namespace android::hardware;
using namespace driverTestHelpers;
@@ -18,7 +16,7 @@ using namespace armnn_driver;
using HalPolicy = hal_1_0::HalPolicy;
-BOOST_AUTO_TEST_CASE(GetSupportedOperations)
+DOCTEST_TEST_CASE("GetSupportedOperations")
{
auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
@@ -52,9 +50,9 @@ BOOST_AUTO_TEST_CASE(GetSupportedOperations)
model0.operations[0].outputs = hidl_vec<uint32_t>{4};
driver->getSupportedOperations(model0, cb);
- BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
- BOOST_TEST(supported.size() == (size_t)1);
- BOOST_TEST(supported[0] == true);
+ DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
+ DOCTEST_CHECK(supported.size() == (size_t)1);
+ DOCTEST_CHECK(supported[0] == true);
V1_0::Model model1 = {};
@@ -81,8 +79,8 @@ BOOST_AUTO_TEST_CASE(GetSupportedOperations)
driver->getSupportedOperations(model1, cb);
- BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::INVALID_ARGUMENT);
- BOOST_TEST(supported.empty());
+ DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::INVALID_ARGUMENT);
+ DOCTEST_CHECK(supported.empty());
// Test Broadcast on add/mul operators
HalPolicy::Model model2 = {};
@@ -114,10 +112,10 @@ BOOST_AUTO_TEST_CASE(GetSupportedOperations)
model2.operations[1].outputs = hidl_vec<uint32_t>{4};
driver->getSupportedOperations(model2, cb);
- BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
- BOOST_TEST(supported.size() == (size_t)2);
- BOOST_TEST(supported[0] == true);
- BOOST_TEST(supported[1] == true);
+ DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
+ DOCTEST_CHECK(supported.size() == (size_t)2);
+ DOCTEST_CHECK(supported[0] == true);
+ DOCTEST_CHECK(supported[1] == true);
V1_0::Model model3 = {};
@@ -143,9 +141,9 @@ BOOST_AUTO_TEST_CASE(GetSupportedOperations)
model3.operations[0].outputs = hidl_vec<uint32_t>{3, 4};
driver->getSupportedOperations(model3, cb);
- BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
- BOOST_TEST(supported.size() == (size_t)1);
- BOOST_TEST(supported[0] == false);
+ DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
+ DOCTEST_CHECK(supported.size() == (size_t)1);
+ DOCTEST_CHECK(supported[0] == false);
HalPolicy::Model model4 = {};
@@ -158,14 +156,14 @@ BOOST_AUTO_TEST_CASE(GetSupportedOperations)
model4.operations[0].outputs = hidl_vec<uint32_t>{0};
driver->getSupportedOperations(model4, cb);
- BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::INVALID_ARGUMENT);
- BOOST_TEST(supported.empty());
+ DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::INVALID_ARGUMENT);
+ DOCTEST_CHECK(supported.empty());
}
// The purpose of this test is to ensure that when encountering an unsupported operation
// it is skipped and getSupportedOperations() continues (rather than failing and stopping).
// As per IVGCVSW-710.
-BOOST_AUTO_TEST_CASE(UnsupportedLayerContinueOnFailure)
+DOCTEST_TEST_CASE("UnsupportedLayerContinueOnFailure")
{
auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
@@ -240,16 +238,16 @@ BOOST_AUTO_TEST_CASE(UnsupportedLayerContinueOnFailure)
// We are testing that the unsupported layers return false and the test continues rather than failing and stopping
driver->getSupportedOperations(model, cb);
- BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
- BOOST_TEST(supported.size() == (size_t)3);
- BOOST_TEST(supported[0] == false);
- BOOST_TEST(supported[1] == true);
- BOOST_TEST(supported[2] == false);
+ DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
+ DOCTEST_CHECK(supported.size() == (size_t)3);
+ DOCTEST_CHECK(supported[0] == false);
+ DOCTEST_CHECK(supported[1] == true);
+ DOCTEST_CHECK(supported[2] == false);
}
// The purpose of this test is to ensure that when encountering an failure
// during mem pool mapping we properly report an error to the framework via a callback
-BOOST_AUTO_TEST_CASE(ModelToINetworkConverterMemPoolFail)
+DOCTEST_TEST_CASE("ModelToINetworkConverterMemPoolFail")
{
auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
@@ -268,8 +266,8 @@ BOOST_AUTO_TEST_CASE(ModelToINetworkConverterMemPoolFail)
// Memory pool mapping should fail, we should report an error
driver->getSupportedOperations(model, cb);
- BOOST_TEST((int)errorStatus != (int)V1_0::ErrorStatus::NONE);
- BOOST_TEST(supported.empty());
+ DOCTEST_CHECK((int)errorStatus != (int)V1_0::ErrorStatus::NONE);
+ DOCTEST_CHECK(supported.empty());
}
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/Lstm.hpp b/test/Lstm.hpp
index 2cb3c264..93f2f32d 100644
--- a/test/Lstm.hpp
+++ b/test/Lstm.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -9,8 +9,6 @@
#include <armnn/utility/IgnoreUnused.hpp>
-#include <boost/math/special_functions/relative_difference.hpp>
-
#include <array>
using ArmnnDriver = armnn_driver::ArmnnDriver;
@@ -40,26 +38,6 @@ RequestArgument CreateRequestArgument(const std::vector<T>& value, unsigned int
return inputRequestArgument;
}
-// Returns true if the relative difference between two float values is less than the tolerance value given.
-// This is used because the floating point comparison tolerance (set on each BOOST_AUTO_TEST_CASE) does not work!
-bool TolerantCompareEqual(float a, float b, float tolerance = 0.00001f)
-{
- float rd;
- if (a == 0.0f)
- {
- rd = fabs(b);
- }
- else if (b == 0.0f)
- {
- rd = fabs(a);
- }
- else
- {
- rd = boost::math::relative_difference(a, b);
- }
- return rd < tolerance;
-}
-
// Helper function to create an OperandLifeTime::NO_VALUE for testing.
// To be used on optional input operands that have no values - these are valid and should be tested.
V1_0::OperandLifeTime CreateNoValueLifeTime(const hidl_vec<uint32_t>& dimensions)
@@ -100,12 +78,6 @@ void ExecuteModel<armnn_driver::hal_1_2::HalPolicy::Model>(const armnn_driver::h
} // anonymous namespace
-#ifndef ARMCOMPUTECL_ENABLED
-static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
-static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::GpuAcc }};
-#endif
-
// Add our own tests here since we fail the lstm tests which Google supplies (because of non-const weights)
template <typename HalPolicy>
void LstmTestImpl(const hidl_vec<uint32_t>& inputDimensions,
@@ -394,18 +366,20 @@ void LstmTestImpl(const hidl_vec<uint32_t>& inputDimensions,
// check the results
for (size_t i = 0; i < outputStateOutValue.size(); ++i)
{
- BOOST_TEST(TolerantCompareEqual(outputStateOutValue[i], outputStateOutData[i]),
- "outputStateOut[" << i << "]: " << outputStateOutValue[i] << " != " << outputStateOutData[i]);
+ DOCTEST_CHECK_MESSAGE(outputStateOutValue[i] == doctest::Approx( outputStateOutData[i] ),
+ "outputStateOut[" << i << "]: " << outputStateOutValue[i] << " != "
+ << outputStateOutData[i]);
}
for (size_t i = 0; i < cellStateOutValue.size(); ++i)
{
- BOOST_TEST(TolerantCompareEqual(cellStateOutValue[i], cellStateOutData[i]),
- "cellStateOut[" << i << "]: " << cellStateOutValue[i] << " != " << cellStateOutData[i]);
+ DOCTEST_CHECK_MESSAGE(cellStateOutValue[i] == doctest::Approx( cellStateOutData[i] ),
+ "cellStateOutValue[" << i << "]: " << cellStateOutValue[i] << " != "
+ << cellStateOutData[i]);
}
for (size_t i = 0; i < outputValue.size(); ++i)
{
- BOOST_TEST(TolerantCompareEqual(outputValue[i], outputData[i]),
- "output[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+ DOCTEST_CHECK_MESSAGE(outputValue[i] == doctest::Approx( outputData[i] ),
+ "outputValue[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
}
}
@@ -669,13 +643,14 @@ void QuantizedLstmTestImpl(const hidl_vec<uint32_t>& inputDimensions,
// check the results
for (size_t i = 0; i < cellStateOutValue.size(); ++i)
{
- BOOST_TEST(TolerantCompareEqual(cellStateOutValue[i], cellStateOutData[i], 1.0f),
- "cellStateOut[" << i << "]: " << cellStateOutValue[i] << " != " << cellStateOutData[i]);
+ DOCTEST_CHECK_MESSAGE(cellStateOutValue[i] == doctest::Approx( cellStateOutData[i] ),
+ "cellStateOutValue[" << i << "]: " << cellStateOutValue[i] << " != "
+ << cellStateOutData[i]);
}
for (size_t i = 0; i < outputValue.size(); ++i)
{
- BOOST_TEST(TolerantCompareEqual(outputValue[i], outputData[i], 1.0f),
- "output[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+ DOCTEST_CHECK_MESSAGE(outputValue[i] == doctest::Approx( outputData[i] ),
+ "outputValue[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
}
}
diff --git a/test/SystemProperties.cpp b/test/SystemProperties.cpp
index e1a2632e..ef952964 100644
--- a/test/SystemProperties.cpp
+++ b/test/SystemProperties.cpp
@@ -1,57 +1,58 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
+
#include "DriverTestHelpers.hpp"
-#include <boost/test/unit_test.hpp>
#include <log/log.h>
-#include "../SystemPropertiesUtils.hpp"
+#include <SystemPropertiesUtils.hpp>
-BOOST_AUTO_TEST_SUITE(SystemProperiesTests)
+DOCTEST_TEST_SUITE("SystemProperiesTests")
+{
-BOOST_AUTO_TEST_CASE(SystemProperties)
+DOCTEST_TEST_CASE("SystemProperties")
{
// Test default value
{
auto p = __system_property_find("thisDoesNotExist");
- BOOST_TEST((p == nullptr));
+ DOCTEST_CHECK((p == nullptr));
int defaultValue = ParseSystemProperty("thisDoesNotExist", -4);
- BOOST_TEST((defaultValue == -4));
+ DOCTEST_CHECK((defaultValue == -4));
}
// Test default value from bad data type
{
__system_property_set("thisIsNotFloat", "notfloat");
float defaultValue = ParseSystemProperty("thisIsNotFloat", 0.1f);
- BOOST_TEST((defaultValue == 0.1f));
+ DOCTEST_CHECK((defaultValue == 0.1f));
}
// Test fetching bool values
{
__system_property_set("myTestBool", "1");
bool b = ParseSystemProperty("myTestBool", false);
- BOOST_TEST((b == true));
+ DOCTEST_CHECK((b == true));
}
{
__system_property_set("myTestBool", "0");
bool b = ParseSystemProperty("myTestBool", true);
- BOOST_TEST((b == false));
+ DOCTEST_CHECK((b == false));
}
// Test fetching int
{
__system_property_set("myTestInt", "567");
int i = ParseSystemProperty("myTestInt", 890);
- BOOST_TEST((i==567));
+ DOCTEST_CHECK((i==567));
}
// Test fetching float
{
__system_property_set("myTestFloat", "1.2f");
float f = ParseSystemProperty("myTestFloat", 3.4f);
- BOOST_TEST((f==1.2f));
+ DOCTEST_CHECK((f==1.2f));
}
}
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/TestHalfTensor.cpp b/test/TestHalfTensor.cpp
new file mode 100644
index 00000000..12cdc427
--- /dev/null
+++ b/test/TestHalfTensor.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "TestHalfTensor.hpp"
+
+namespace driverTestHelpers
+{
+
+hidl_vec<uint32_t> TestHalfTensor::GetDimensions() const
+{
+ hidl_vec<uint32_t> dimensions;
+ dimensions.resize(m_Shape.GetNumDimensions());
+ for (uint32_t i=0; i<m_Shape.GetNumDimensions(); ++i)
+ {
+ dimensions[i] = m_Shape[i];
+ }
+ return dimensions;
+}
+
+unsigned int TestHalfTensor::GetNumElements() const
+{
+ return m_Shape.GetNumElements();
+}
+
+const Half * TestHalfTensor::GetData() const
+{
+ DOCTEST_CHECK(m_Data.empty() == false);
+ return &m_Data[0];
+}
+
+} // namespace driverTestHelpers
diff --git a/test/TestHalfTensor.hpp b/test/TestHalfTensor.hpp
new file mode 100644
index 00000000..2b7870f4
--- /dev/null
+++ b/test/TestHalfTensor.hpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <ArmnnDriver.hpp>
+#include "DriverTestHelpers.hpp"
+
+#include <half/half.hpp>
+
+using Half = half_float::half;
+
+namespace driverTestHelpers
+{
+
+class TestHalfTensor
+{
+public:
+ TestHalfTensor(const armnn::TensorShape & shape,
+ const std::vector<Half> & data)
+ : m_Shape{shape}
+ , m_Data{data}
+ {
+ DOCTEST_CHECK(m_Shape.GetNumElements() == m_Data.size());
+ }
+
+ hidl_vec<uint32_t> GetDimensions() const;
+ unsigned int GetNumElements() const;
+ const Half * GetData() const;
+
+private:
+ armnn::TensorShape m_Shape;
+ std::vector<Half> m_Data;
+};
+
+} // driverTestHelpers
diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp
index e6cb446f..39bcd5a6 100644
--- a/test/TestTensor.cpp
+++ b/test/TestTensor.cpp
@@ -1,7 +1,8 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
+
#include "TestTensor.hpp"
namespace driverTestHelpers
@@ -25,7 +26,7 @@ unsigned int TestTensor::GetNumElements() const
const float * TestTensor::GetData() const
{
- ARMNN_ASSERT(m_Data.empty() == false);
+ DOCTEST_CHECK(m_Data.empty() == false);
return &m_Data[0];
}
diff --git a/test/TestTensor.hpp b/test/TestTensor.hpp
index 1cd1950d..b0613eb2 100644
--- a/test/TestTensor.hpp
+++ b/test/TestTensor.hpp
@@ -1,12 +1,12 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
-#pragma once
-#include "../ArmnnDriver.hpp"
+#pragma once
-#include <armnn/utility/Assert.hpp>
+#include <ArmnnDriver.hpp>
+#include "DriverTestHelpers.hpp"
namespace driverTestHelpers
{
@@ -19,7 +19,7 @@ public:
: m_Shape{shape}
, m_Data{data}
{
- ARMNN_ASSERT(m_Shape.GetNumElements() == m_Data.size());
+ DOCTEST_CHECK(m_Shape.GetNumElements() == m_Data.size());
}
hidl_vec<uint32_t> GetDimensions() const;
diff --git a/test/Tests.cpp b/test/Tests.cpp
index 0ef142d9..4628414e 100644
--- a/test/Tests.cpp
+++ b/test/Tests.cpp
@@ -1,31 +1,35 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
+
#define LOG_TAG "ArmnnDriverTests"
-#define BOOST_TEST_MODULE armnn_driver_tests
-#include <boost/test/unit_test.hpp>
#include <log/log.h>
-#include "DriverTestHelpers.hpp"
+#ifndef DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#endif
-BOOST_AUTO_TEST_SUITE(DriverTests)
+#include "DriverTestHelpers.hpp"
using namespace android::hardware;
using namespace driverTestHelpers;
using namespace armnn_driver;
-BOOST_AUTO_TEST_CASE(Init)
+DOCTEST_TEST_SUITE("DriverTests")
+{
+
+DOCTEST_TEST_CASE("Init")
{
// Making the driver object on the stack causes a weird libc error, so make it on the heap instead
auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
V1_0::DeviceStatus status = driver->getStatus();
- // Note double-parentheses to avoid compile error from Boost trying to printf the DeviceStatus
- BOOST_TEST((status == V1_0::DeviceStatus::AVAILABLE));
+ // Note double-parentheses to avoid compile error from doctest trying to printf the DeviceStatus
+ DOCTEST_CHECK((status == V1_0::DeviceStatus::AVAILABLE));
}
-BOOST_AUTO_TEST_CASE(TestCapabilities)
+DOCTEST_TEST_CASE("TestCapabilities")
{
// Making the driver object on the stack causes a weird libc error, so make it on the heap instead
auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
@@ -41,11 +45,11 @@ BOOST_AUTO_TEST_CASE(TestCapabilities)
driver->getCapabilities(cb);
- BOOST_TEST((int)error == (int)V1_0::ErrorStatus::NONE);
- BOOST_TEST(cap.float32Performance.execTime > 0.f);
- BOOST_TEST(cap.float32Performance.powerUsage > 0.f);
- BOOST_TEST(cap.quantized8Performance.execTime > 0.f);
- BOOST_TEST(cap.quantized8Performance.powerUsage > 0.f);
+ DOCTEST_CHECK((int)error == (int)V1_0::ErrorStatus::NONE);
+ DOCTEST_CHECK(cap.float32Performance.execTime > 0.f);
+ DOCTEST_CHECK(cap.float32Performance.powerUsage > 0.f);
+ DOCTEST_CHECK(cap.quantized8Performance.execTime > 0.f);
+ DOCTEST_CHECK(cap.quantized8Performance.powerUsage > 0.f);
}
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/UnidirectionalSequenceLstm.hpp b/test/UnidirectionalSequenceLstm.hpp
new file mode 100644
index 00000000..75b7a8d4
--- /dev/null
+++ b/test/UnidirectionalSequenceLstm.hpp
@@ -0,0 +1,1419 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DriverTestHelpers.hpp"
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <array>
+
+using ArmnnDriver = armnn_driver::ArmnnDriver;
+using DriverOptions = armnn_driver::DriverOptions;
+using RequestArgument = V1_0::RequestArgument;
+
+#ifdef ARMNN_ANDROID_S
+#include <nnapi/Types.h>
+#endif
+
+using namespace driverTestHelpers;
+using namespace android::hardware;
+
+namespace
+{
+
+template<typename T>
+RequestArgument CreateRequestArgument(const std::vector<T>& value, unsigned int poolIndex)
+{
+ V1_0::DataLocation inputInloc = {};
+ inputInloc.poolIndex = poolIndex;
+ inputInloc.offset = 0;
+ inputInloc.length = value.size() * sizeof(T);
+ RequestArgument inputRequestArgument = {};
+ inputRequestArgument.location = inputInloc;
+ inputRequestArgument.dimensions = hidl_vec<uint32_t>{};
+ return inputRequestArgument;
+}
+
+// Helper function to create an OperandLifeTime::NO_VALUE for testing.
+// To be used on optional input operands that have no values - these are valid and should be tested.
+V1_0::OperandLifeTime CreateNoValueLifeTime(const hidl_vec<uint32_t>& dimensions)
+{
+ // Only create a NO_VALUE for optional operands that have no elements
+ if (dimensions.size() == 0 || dimensions[0] == 0)
+ {
+ return V1_0::OperandLifeTime::NO_VALUE;
+ }
+ return V1_0::OperandLifeTime::CONSTANT_COPY;
+}
+
+template<typename HalModel>
+void ExecuteModel(const HalModel& model, armnn_driver::ArmnnDriver& driver, const V1_0::Request& request)
+{
+ android::sp<V1_0::IPreparedModel> preparedModel = PrepareModel(model, driver);
+ if (preparedModel.get() != nullptr)
+ {
+ Execute(preparedModel, request);
+ }
+}
+
+#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3)
+
+template<>
+void ExecuteModel<armnn_driver::hal_1_2::HalPolicy::Model>(const armnn_driver::hal_1_2::HalPolicy::Model& model,
+ armnn_driver::ArmnnDriver& driver,
+ const V1_0::Request& request)
+{
+ android::sp<V1_2::IPreparedModel> preparedModel = PrepareModel_1_2(model, driver);
+ if (preparedModel.get() != nullptr)
+ {
+ Execute(preparedModel, request);
+ }
+}
+
+#endif
+
+} // anonymous namespace
+
+// Add our own tests here since we fail the unidirectional sequence lstm
+// tests which Google supplies (because of non-const weights)
+template <typename HalPolicy>
+void UnidirectionalSequenceLstmTestImpl(const hidl_vec<uint32_t>& inputDimensions,
+ const std::vector<float>& inputValue,
+ const hidl_vec<uint32_t>& inputToInputWeightsDimensions,
+ const std::vector<float>& inputToInputWeightsValue,
+ const hidl_vec<uint32_t>& inputToForgetWeightsDimensions,
+ const std::vector<float>& inputToForgetWeightsValue,
+ const hidl_vec<uint32_t>& inputToCellWeightsDimensions,
+ const std::vector<float>& inputToCellWeightsValue,
+ const hidl_vec<uint32_t>& inputToOutputWeightsDimensions,
+ const std::vector<float>& inputToOutputWeightsValue,
+ const hidl_vec<uint32_t>& recurrentToInputWeightsDimensions,
+ const std::vector<float>& recurrentToInputWeightsValue,
+ const hidl_vec<uint32_t>& recurrentToForgetWeightsDimensions,
+ const std::vector<float>& recurrentToForgetWeightsValue,
+ const hidl_vec<uint32_t>& recurrentToCellWeightsDimensions,
+ const std::vector<float>& recurrentToCellWeightsValue,
+ const hidl_vec<uint32_t>& recurrentToOutputWeightsDimensions,
+ const std::vector<float>& recurrentToOutputWeightsValue,
+ const hidl_vec<uint32_t>& cellToInputWeightsDimensions,
+ const std::vector<float>& cellToInputWeightsValue,
+ const hidl_vec<uint32_t>& cellToForgetWeightsDimensions,
+ const std::vector<float>& cellToForgetWeightsValue,
+ const hidl_vec<uint32_t>& cellToOutputWeightsDimensions,
+ const std::vector<float>& cellToOutputWeightsValue,
+ const hidl_vec<uint32_t>& inputGateBiasDimensions,
+ const std::vector<float>& inputGateBiasValue,
+ const hidl_vec<uint32_t>& forgetGateBiasDimensions,
+ const std::vector<float>& forgetGateBiasValue,
+ const hidl_vec<uint32_t>& cellBiasDimensions,
+ const std::vector<float>& cellBiasValue,
+ const hidl_vec<uint32_t>& outputGateBiasDimensions,
+ const std::vector<float>& outputGateBiasValue,
+ const hidl_vec<uint32_t>& projectionWeightsDimensions,
+ const std::vector<float>& projectionWeightsValue,
+ const hidl_vec<uint32_t>& projectionBiasDimensions,
+ const std::vector<float>& projectionBiasValue,
+ const hidl_vec<uint32_t>& outputStateInDimensions,
+ const std::vector<float>& outputStateInValue,
+ const hidl_vec<uint32_t>& cellStateInDimensions,
+ const std::vector<float>& cellStateInValue,
+ const hidl_vec<uint32_t>& activationFunctionDimensions,
+ const std::vector<int32_t>& activationFunctionValue,
+ const hidl_vec<uint32_t>& cellClippingThresholdDimensions,
+ const std::vector<float>& cellClippingThresholdValue,
+ const hidl_vec<uint32_t>& projectionClippingThresholdDimensions,
+ const std::vector<float>& projectionClippingThresholdValue,
+ const bool& timeMajorValue,
+ const hidl_vec<uint32_t>& inputLayerNormWeightsDimensions,
+ const std::vector<float>& inputLayerNormWeightsValue,
+ const hidl_vec<uint32_t>& forgetLayerNormWeightsDimensions,
+ const std::vector<float>& forgetLayerNormWeightsValue,
+ const hidl_vec<uint32_t>& cellLayerNormWeightsDimensions,
+ const std::vector<float>& cellLayerNormWeightsValue,
+ const hidl_vec<uint32_t>& outputLayerNormWeightsDimensions,
+ const std::vector<float>& outputLayerNormWeightsValue,
+ const hidl_vec<uint32_t>& outputDimensions,
+ const std::vector<float>& outputValue,
+ const hidl_vec<uint32_t>&, // outputStateOutDimensions,
+ const std::vector<float>&, // outputStateOutValue,
+ const hidl_vec<uint32_t>&, // cellStateOutDimensions,
+ const std::vector<float>&, // cellStateOutValue,
+ armnn::Compute compute,
+ float epsilonValue = 0)
+{
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(compute));
+ using Model = typename HalPolicy::Model;
+ Model model = {};
+
+ // Inputs:
+ // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where
+ // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+ AddInputOperand<HalPolicy>(model, inputDimensions);
+
+ // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size], where “num_units” corresponds to the number of cell units.
+ AddTensorOperand<HalPolicy>(model,
+ inputToInputWeightsDimensions,
+ inputToInputWeightsValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(inputToInputWeightsDimensions));
+ // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ AddTensorOperand<HalPolicy>(model, inputToForgetWeightsDimensions, inputToForgetWeightsValue);
+ // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ AddTensorOperand<HalPolicy>(model, inputToCellWeightsDimensions, inputToCellWeightsValue);
+ // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ AddTensorOperand<HalPolicy>(model, inputToOutputWeightsDimensions, inputToOutputWeightsValue);
+ // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+ // “num_units”), or the second dimension of the “projection_weights”, if defined.
+ AddTensorOperand<HalPolicy>(model,
+ recurrentToInputWeightsDimensions,
+ recurrentToInputWeightsValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(recurrentToInputWeightsDimensions));
+ // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ AddTensorOperand<HalPolicy>(model, recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue);
+ // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ AddTensorOperand<HalPolicy>(model, recurrentToCellWeightsDimensions, recurrentToCellWeightsValue);
+ // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ AddTensorOperand<HalPolicy>(model, recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue);
+ // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ AddTensorOperand<HalPolicy>(model,
+ cellToInputWeightsDimensions,
+ cellToInputWeightsValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(cellToInputWeightsDimensions));
+ // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ AddTensorOperand<HalPolicy>(model,
+ cellToForgetWeightsDimensions,
+ cellToForgetWeightsValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(cellToForgetWeightsDimensions));
+ // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ AddTensorOperand<HalPolicy>(model,
+ cellToOutputWeightsDimensions,
+ cellToOutputWeightsValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(cellToOutputWeightsDimensions));
+ // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ AddTensorOperand<HalPolicy>(model,
+ inputGateBiasDimensions,
+ inputGateBiasValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(inputGateBiasDimensions));
+ // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ AddTensorOperand<HalPolicy>(model, forgetGateBiasDimensions, forgetGateBiasValue);
+ // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ AddTensorOperand<HalPolicy>(model, cellBiasDimensions, cellBiasValue);
+ // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ AddTensorOperand<HalPolicy>(model, outputGateBiasDimensions, outputGateBiasValue);
+ // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [output_size, num_units].
+ AddTensorOperand<HalPolicy>(model,
+ projectionWeightsDimensions,
+ projectionWeightsValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(projectionWeightsDimensions));
+ // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+ AddTensorOperand<HalPolicy>(model,
+ projectionBiasDimensions,
+ projectionBiasValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(projectionBiasDimensions));
+
+ // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+ AddInputOperand<HalPolicy>(model, outputStateInDimensions);
+ // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+ AddInputOperand<HalPolicy>(model, cellStateInDimensions);
+
+ // Constant scalar values (the VTS test adds these as tensors of dim {})
+ // 20: The activation function: A value indicating the activation function:
+ // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+ AddTensorOperand<HalPolicy>(model,
+ activationFunctionDimensions,
+ activationFunctionValue,
+ HalPolicy::OperandType::INT32);
+ // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+ // If set to 0.0 then clipping is disabled.
+ AddTensorOperand<HalPolicy>(model,
+ cellClippingThresholdDimensions,
+ cellClippingThresholdValue,
+ HalPolicy::OperandType::FLOAT32);
+ // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+ // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+ AddTensorOperand<HalPolicy>(model,
+ projectionClippingThresholdDimensions,
+ projectionClippingThresholdValue,
+ HalPolicy::OperandType::FLOAT32);
+
+ // 23: Time-major if true, batch-major if false.
+ AddBoolOperand<HalPolicy>(model, timeMajorValue);
+
+ // Normalization:
+ // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at input gate.
+ AddTensorOperand<HalPolicy>(model,
+ inputLayerNormWeightsDimensions,
+ inputLayerNormWeightsValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(inputLayerNormWeightsDimensions));
+ // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at forget gate.
+ AddTensorOperand<HalPolicy>(model,
+ forgetLayerNormWeightsDimensions,
+ forgetLayerNormWeightsValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(forgetLayerNormWeightsDimensions));
+ // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at cell gate.
+ AddTensorOperand<HalPolicy>(model,
+ cellLayerNormWeightsDimensions,
+ cellLayerNormWeightsValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(cellLayerNormWeightsDimensions));
+ // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at output gate.
+ AddTensorOperand<HalPolicy>(model,
+ outputLayerNormWeightsDimensions,
+ outputLayerNormWeightsValue,
+ HalPolicy::OperandType::TENSOR_FLOAT32,
+ CreateNoValueLifeTime(outputLayerNormWeightsDimensions));
+
+ // Outputs:
+ // 00: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape: if time-major:
+ // [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+ AddOutputOperand<HalPolicy>(model, outputDimensions);
+ // 01: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, output_size]. This output is optional and can be omitted. If this output
+ // is present then output #2 must be present as well.
+ //AddOutputOperand<HalPolicy>(model, hiddenStateOutDimensions);
+ // 02: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, num_units]. This output is optional and can be omitted.
+ //AddOutputOperand<HalPolicy>(model, cellStateOutDimensions);
+
+ // make the lstm operation
+ model.operations.resize(1);
+ model.operations[0].type = HalPolicy::OperationType::UNIDIRECTIONAL_SEQUENCE_LSTM;
+
+ model.operations[0].inputs = hidl_vec<uint32_t> {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
+ 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
+ model.operations[0].outputs = hidl_vec<uint32_t> {28};
+
+ // define the input values
+ hidl_vec<RequestArgument> inputArguments;
+ inputArguments.resize(3);
+
+ inputArguments[0] = CreateRequestArgument<float>(inputValue, 0);
+ inputArguments[1] = CreateRequestArgument<float>(outputStateInValue, 1);
+ inputArguments[2] = CreateRequestArgument<float>(cellStateInValue, 2);
+
+ // define the expected output values
+ hidl_vec<RequestArgument> outputArguments;
+ outputArguments.resize(1);
+
+ outputArguments[0] = CreateRequestArgument<float>(outputValue, 3);
+
+ V1_0::Request request = {};
+ request.inputs = inputArguments;
+ request.outputs = outputArguments;
+
+ // set the input data
+ AddPoolAndSetData(inputValue.size(), request, inputValue.data());
+ AddPoolAndSetData(outputStateInValue.size(), request, outputStateInValue.data());
+ AddPoolAndSetData(cellStateInValue.size(), request, cellStateInValue.data());
+
+ // add memory for the outputs
+ android::sp<IMemory> outputMemory = AddPoolAndGetData<float>(outputValue.size(), request);
+ float* outputData = static_cast<float*>(static_cast<void*>(outputMemory->getPointer()));
+
+ // make the prepared model and run the execution
+ ExecuteModel(model, *driver, request);
+
+ // check the results
+ if (epsilonValue != 0)
+ {
+ for (size_t i = 0; i < outputValue.size(); ++i)
+ {
+ DOCTEST_CHECK_MESSAGE(outputValue[i] == doctest::Approx(outputData[i]).epsilon(epsilonValue),
+ "outputValue[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+ }
+ }
+ else
+ {
+ for (size_t i = 0; i < outputValue.size(); ++i)
+ {
+ DOCTEST_CHECK_MESSAGE(outputValue[i] == doctest::Approx(outputData[i]),
+ "outputValue[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+ }
+ }
+}
+
+template<typename HalPolicy>
+void UnidirectionalSequenceLstmLayerFloat32TestImpl(armnn::Compute compute)
+{
+ uint32_t batchSize = 3;
+ uint32_t timeSize = 2;
+ uint32_t inputSize = 3;
+ uint32_t outputSize = 4;
+ uint32_t numUnits = outputSize;
+
+ // Inputs:
+ // 00: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+ // [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length),
+ // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+ hidl_vec<uint32_t> inputDimensions{batchSize, timeSize, inputSize};
+ std::vector<float> inputValue{1., 2., 3., 4., 5., 4.,
+ 3., 2., 1., 2., 3., 4.,
+ 5., 4., 3., 2., 1., 2.};
+
+ // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size], where “num_units” corresponds to the number of cell units.
+ hidl_vec<uint32_t> inputToInputWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToInputWeightsValue{-0.49536117f, -0.0556083915f, -0.102400711f,
+ -0.117484632f, 0.3298470976f, -0.1179017122f,
+ 0.214305695f, 0.42135173085f, 0.003878414626f,
+ -0.348303917f, -0.1881275477f, 0.0343011027f};
+ // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ hidl_vec<uint32_t> inputToForgetWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToForgetWeightsValue{0.2415594226f, 0.15400093799f, 0.4566498398f,
+ -0.3810434485f, 0.268383264f, -0.009807467424f,
+ -0.3522925403f, -0.24275735512f, -0.28344226125f,
+ 0.13512269116f, -0.4932442977f, -0.10039821991f};
+ // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+ hidl_vec<uint32_t> inputToCellWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToCellWeightsValue{-0.2504855627f, 0.184490025045f, -0.2480507493f,
+ 0.386399507f, -0.259465157985f, -0.16545993089f,
+ -0.4230232555f, 0.341664791103f, -0.18127849691f,
+ -0.2277662414f, -0.55275535589f, 0.34184026718f};
+ // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ hidl_vec<uint32_t> inputToOutputWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToOutputWeightsValue{0.2303854227f, 0.5218806862f, -0.4865379333f,
+ 0.53969591851f, 0.23393625035f, -0.27140527306f,
+ 0.50009280443f, 0.07511717046f, 0.3998299249f,
+ -0.51717478049f, 0.1889653282f, -0.367323637f};
+ // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+ // “num_units”), or the second dimension of the “projection_weights”, if defined.
+ hidl_vec<uint32_t> recurrentToInputWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToInputWeightsValue{-0.128009796112f, 0.1995525098f, -0.07745539397f, 0.1558421701f,
+ -0.265254765766f, -0.38837709614f, -0.05636804124f, 0.4259087456f,
+ 0.17628988623f, 0.3877420127f, 0.53300309181f, -0.0959980934f,
+ 0.00302857416f, 0.3266998827f, -0.142509296562f, -0.04433270756f};
+ // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToForgetWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToForgetWeightsValue{-0.09499983487f, -0.08814888417f, -0.04834804721f, 0.1516668247f,
+ -0.3967529535f, -0.06463699788f, 0.4952811002f, 0.003274492938f,
+ -0.0968840941f, 0.17928104102f, 0.0031281141592f, -0.3387276584f,
+ -0.3587934076f, 0.06705895066f, 0.22463923692f, 0.1961955726f};
+ // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToCellWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToCellWeightsValue{-0.21938985582f, -0.3023648226f, -0.1170005202f, -0.3509177422f,
+ -0.4286288613f, 0.2726137042f, 0.09216640889f, -0.06551410215f,
+ 0.20453298098f, 0.2393476665f, 0.11846517771f, 0.2630801796f,
+ 0.3954237699f, -0.19407111404f, 0.30412107706f, -0.27342408554f};
+ // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToOutputWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToOutputWeightsValue{-0.32921677827f, 0.32624614238f, -0.1388191282f,
+ -0.17879831790f, -0.15185534954f, -0.16918526583f,
+ -0.10087361183f, -0.5436913968f, 0.016758225858f,
+ 0.30454617738f, -0.41493862867f, -0.005565764375f,
+ -0.12584099173f, -0.12319286912f, 0.2407919466f,
+ -0.08879069983f};
+ // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToInputWeightsDimensions{0};
+ std::vector<float> cellToInputWeightsValue;
+ // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToForgetWeightsDimensions{0};
+ std::vector<float> cellToForgetWeightsValue;
+ // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToOutputWeightsDimensions{0};
+ std::vector<float> cellToOutputWeightsValue;
+ // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> inputGateBiasDimensions{numUnits};
+ std::vector<float> inputGateBiasValue(numUnits, 0.0f);
+ // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> forgetGateBiasDimensions{numUnits};
+ std::vector<float> forgetGateBiasValue(numUnits, 1.0f);
+ // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellBiasDimensions{numUnits};
+ std::vector<float> cellBiasValue(numUnits, 0.0f);
+ // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> outputGateBiasDimensions{numUnits};
+ std::vector<float> outputGateBiasValue(numUnits, 0.0f);
+ // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [output_size, num_units].
+ hidl_vec<uint32_t> projectionWeightsDimensions{0};
+ std::vector<float> projectionWeightsValue;
+ // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+ hidl_vec<uint32_t> projectionBiasDimensions{0};
+ std::vector<float> projectionBiasValue;
+
+ // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+ hidl_vec<uint32_t> outputStateInDimensions{batchSize, outputSize};
+ std::vector<float> outputStateInValue(batchSize * outputSize, 0.0f);
+ // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+ hidl_vec<uint32_t> cellStateInDimensions{batchSize, numUnits};
+ std::vector<float> cellStateInValue(batchSize * numUnits, 0.0f);
+
+ // Constant scalar values (the VTS test adds these as tensors of dim {})
+ // 20: The activation function: A value indicating the activation function:
+ // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+ hidl_vec<uint32_t> activationFunctionDimensions{};
+ std::vector<int32_t> activationFunctionValue{4};
+ // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+ // If set to 0.0 then clipping is disabled.
+ hidl_vec<uint32_t> cellClippingThresholdDimensions{};
+ std::vector<float> cellClippingThresholdValue{10.0f};
+ // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+ // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+ hidl_vec<uint32_t> projectionClippingThresholdDimensions{};
+ std::vector<float> projectionClippingThresholdValue{0.f};
+
+ // 23: Time-major if true, batch-major if false.
+ bool timeMajorValue = false;
+
+ // Normalization:
+ // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at input gate.
+ hidl_vec<uint32_t> inputLayerNormWeightsDimensions{0};
+ std::vector<float> inputLayerNormWeightsValue;
+ // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at forget gate.
+ hidl_vec<uint32_t> forgetLayerNormWeightsDimensions{0};
+ std::vector<float> forgetLayerNormWeightsValue;
+ // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at cell gate.
+ hidl_vec<uint32_t> cellLayerNormWeightsDimensions{0};
+ std::vector<float> cellLayerNormWeightsValue;
+ // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at output gate.
+ hidl_vec<uint32_t> outputLayerNormWeightsDimensions{0};
+ std::vector<float> outputLayerNormWeightsValue;
+
+ // Outputs:
+ // 0: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape: if time-major:
+ // [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+ hidl_vec<uint32_t> outputDimensions{batchSize, timeSize, outputSize};
+ std::vector<float> outputValue{-0.07149004f, -0.1621171f, -0.17516759f, -0.0232934225f,
+ -0.16810727f, -0.41412935f, -0.5498753f, -0.00803578f,
+ -0.06687349f, 0.204077631f, -0.4276504f, -0.03123213f,
+ -0.12000261f, -0.0941918f, -0.45639035f, -0.02870186f,
+ -0.03429216f, 0.20824050f, -0.6569892f, -0.004152651f,
+ -0.10493034f, 0.14210969f, -0.58347696f, -0.03297536f};
+
+ // 1: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, output_size]. This output is optional and can be omitted. If this output
+ // is present then output #2 must be present as well.
+ hidl_vec<uint32_t> hiddenStateOutDimensions{batchSize, outputSize};
+ std::vector<float> hiddenStateOutValue(batchSize * outputSize, 0.f);
+ // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, num_units]. This output is optional and can be omitted.
+ hidl_vec<uint32_t> cellStateOutDimensions{batchSize, numUnits};
+ std::vector<float> cellStateOutValue(batchSize * numUnits, 0.f);
+
+ UnidirectionalSequenceLstmTestImpl<HalPolicy>(inputDimensions, inputValue,
+ inputToInputWeightsDimensions, inputToInputWeightsValue,
+ inputToForgetWeightsDimensions, inputToForgetWeightsValue,
+ inputToCellWeightsDimensions, inputToCellWeightsValue,
+ inputToOutputWeightsDimensions, inputToOutputWeightsValue,
+ recurrentToInputWeightsDimensions, recurrentToInputWeightsValue,
+ recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue,
+ recurrentToCellWeightsDimensions, recurrentToCellWeightsValue,
+ recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue,
+ cellToInputWeightsDimensions, cellToInputWeightsValue,
+ cellToForgetWeightsDimensions, cellToForgetWeightsValue,
+ cellToOutputWeightsDimensions, cellToOutputWeightsValue,
+ inputGateBiasDimensions, inputGateBiasValue,
+ forgetGateBiasDimensions, forgetGateBiasValue,
+ cellBiasDimensions, cellBiasValue,
+ outputGateBiasDimensions, outputGateBiasValue,
+ projectionWeightsDimensions, projectionWeightsValue,
+ projectionBiasDimensions, projectionBiasValue,
+ outputStateInDimensions, outputStateInValue,
+ cellStateInDimensions, cellStateInValue,
+ activationFunctionDimensions, activationFunctionValue,
+ cellClippingThresholdDimensions, cellClippingThresholdValue,
+ projectionClippingThresholdDimensions,
+ projectionClippingThresholdValue,
+ timeMajorValue,
+ inputLayerNormWeightsDimensions, inputLayerNormWeightsValue,
+ forgetLayerNormWeightsDimensions, forgetLayerNormWeightsValue,
+ cellLayerNormWeightsDimensions, cellLayerNormWeightsValue,
+ outputLayerNormWeightsDimensions, outputLayerNormWeightsValue,
+ outputDimensions, outputValue,
+ hiddenStateOutDimensions, hiddenStateOutValue,
+ cellStateOutDimensions, cellStateOutValue,
+ compute);
+}
+
+template<typename HalPolicy>
+void UnidirectionalSequenceLstmLayerFloat32TimeMajorTestImpl(armnn::Compute compute)
+{
+ uint32_t batchSize = 3;
+ uint32_t timeSize = 2;
+ uint32_t inputSize = 3;
+ uint32_t outputSize = 4;
+ uint32_t numUnits = outputSize;
+
+ // Inputs:
+ // 00: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+ // [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length),
+ // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+ hidl_vec<uint32_t> inputDimensions{timeSize, batchSize, inputSize};
+ std::vector<float> inputValue{1., 2., 3., 4., 5., 4.,
+ 3., 2., 1., 2., 3., 4.,
+ 5., 4., 3., 2., 1., 2.};
+
+ // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size], where “num_units” corresponds to the number of cell units.
+ hidl_vec<uint32_t> inputToInputWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToInputWeightsValue{0.27277296781539917f, 0.3813590407371521f, -0.394489049911499f,
+ 0.2782636880874634f, -0.3793870210647583f, -0.018918335437774658f,
+ 0.2724653482437134f, -0.19314253330230713f, -0.2947450876235962f,
+ -0.30253493785858154f, 0.4241350293159485f, -0.22560018301010132f};
+ // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ hidl_vec<uint32_t> inputToForgetWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToForgetWeightsValue{-0.2667974531650543f, -0.05505800247192383f, -0.20932340621948242f,
+ -0.14345619082450867f, 0.09666192531585693f, -0.2604355812072754f,
+ -0.2681812047958374f, -0.3314584493637085f, 0.4485899806022644f,
+ -0.23467743396759033f, 0.5072842240333557f, -0.4192768931388855f};
+ // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+ hidl_vec<uint32_t> inputToCellWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToCellWeightsValue{-0.15782442688941956f, -0.027530014514923096f, 0.4789854884147644f,
+ 0.23227906227111816f, 0.28259342908859253f, -0.030095696449279785f,
+ 0.10071521997451782f, -0.08535495400428772f, 0.18563997745513916f,
+ -0.3049069046974182f, -0.478048175573349f, 0.025234103202819824f};
+ // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ hidl_vec<uint32_t> inputToOutputWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToOutputWeightsValue{-0.04584759473800659f, -0.2716066539287567f, 0.012970447540283203f,
+ -0.4729190170764923f, -0.37422770261764526f, 0.49352723360061646f,
+ 0.3163864016532898f, -0.436781644821167f, -0.33074596524238586f,
+ -0.32885751128196716f, -0.40959352254867554f, -0.2124689817428589f};
+ // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+ // “num_units”), or the second dimension of the “projection_weights”, if defined.
+ hidl_vec<uint32_t> recurrentToInputWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToInputWeightsValue{0.23788475990f, -0.24948765337f, 0.50044941902f,
+ 0.14431896805f, -0.115940228137f, -0.717082679f,
+ -0.17208620906f, 0.17850610617f, -0.16702319684f,
+ -0.11384502053f, -0.309785276245f, -0.3316611672f,
+ 0.52380162477f, -0.06839632987f, -0.391478359627f,
+ -0.10756178963f};
+ // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToForgetWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToForgetWeightsValue{0.11383482068f, 0.1676601767f, -0.08550968004f, 0.03399394089f,
+ 0.08042152225f, -0.2133381964f, 0.05182432704f, 0.38161808255f,
+ -0.5018365979f, -0.08043262364f, 0.07894329014f, -0.07547105155f,
+ 0.12047368288f, 0.2986997961f, 0.0485043078f, -0.13372567296f};
+ // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToCellWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToCellWeightsValue{0.0433832928545f, 0.07587072294f, -0.120520234107f, 0.604576051f,
+ -0.434353142986f, 0.009314475068f, 0.005085289478f, 0.08488202038f,
+ -0.00025437487886f, 0.15245915082f, -0.1936587542f, 0.004754020f,
+ -0.1582719236f, 0.3307867646f, 0.0236605107784f, 0.307716339826f};
+ // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToOutputWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToOutputWeightsValue{-0.079031050201f, 0.041414566286f, -0.583727357285f,
+ 0.1025384515f, -0.172372072937f, 0.09214124082f,
+ 0.178184121827f, -0.2439443916f, 0.104485116899f,
+ 0.2600405514f, 0.064414866268f, 0.24141204357f,
+ 0.281875759363f, -0.14234502664f, 0.15126448862f,
+ -0.24421440064f};
+ // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToInputWeightsDimensions{0};
+ std::vector<float> cellToInputWeightsValue;
+ // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToForgetWeightsDimensions{0};
+ std::vector<float> cellToForgetWeightsValue;
+ // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToOutputWeightsDimensions{0};
+ std::vector<float> cellToOutputWeightsValue;
+ // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> inputGateBiasDimensions{numUnits};
+ std::vector<float> inputGateBiasValue(numUnits, 0.0f);
+ // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> forgetGateBiasDimensions{numUnits};
+ std::vector<float> forgetGateBiasValue(numUnits, 1.0f);
+ // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellBiasDimensions{numUnits};
+ std::vector<float> cellBiasValue(numUnits, 0.0f);
+ // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> outputGateBiasDimensions{numUnits};
+ std::vector<float> outputGateBiasValue(numUnits, 0.0f);
+ // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [output_size, num_units].
+ hidl_vec<uint32_t> projectionWeightsDimensions{0};
+ std::vector<float> projectionWeightsValue;
+ // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+ hidl_vec<uint32_t> projectionBiasDimensions{0};
+ std::vector<float> projectionBiasValue;
+
+ // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+ hidl_vec<uint32_t> outputStateInDimensions{batchSize, outputSize};
+ std::vector<float> outputStateInValue(batchSize * outputSize, 0.0f);
+ // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+ hidl_vec<uint32_t> cellStateInDimensions{batchSize, numUnits};
+ std::vector<float> cellStateInValue(batchSize * numUnits, 0.0f);
+
+ // Constant scalar values (the VTS test adds these as tensors of dim {})
+ // 20: The activation function: A value indicating the activation function:
+ // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+ hidl_vec<uint32_t> activationFunctionDimensions{};
+ std::vector<int32_t> activationFunctionValue{4};
+ // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+ // If set to 0.0 then clipping is disabled.
+ hidl_vec<uint32_t> cellClippingThresholdDimensions{};
+ std::vector<float> cellClippingThresholdValue{10.0f};
+ // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+ // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+ hidl_vec<uint32_t> projectionClippingThresholdDimensions{};
+ std::vector<float> projectionClippingThresholdValue{0.f};
+
+ // 23: Time-major if true, batch-major if false.
+ bool timeMajorValue = true;
+
+ // Normalization:
+ // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at input gate.
+ hidl_vec<uint32_t> inputLayerNormWeightsDimensions{0};
+ std::vector<float> inputLayerNormWeightsValue;
+ // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at forget gate.
+ hidl_vec<uint32_t> forgetLayerNormWeightsDimensions{0};
+ std::vector<float> forgetLayerNormWeightsValue;
+ // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at cell gate.
+ hidl_vec<uint32_t> cellLayerNormWeightsDimensions{0};
+ std::vector<float> cellLayerNormWeightsValue;
+ // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at output gate.
+ hidl_vec<uint32_t> outputLayerNormWeightsDimensions{0};
+ std::vector<float> outputLayerNormWeightsValue;
+
+ // Outputs:
+ // 0: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape: if time-major:
+ // [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+ hidl_vec<uint32_t> outputDimensions{timeSize, batchSize, outputSize};
+ std::vector<float> outputValue{0.135657698f, 0.124672532f, 0.0212090332f, -0.0530203655f,
+ 0.106138252f, 0.0404792242f, 0.0151643595f, -0.00675163185f,
+ -0.0128514022f, 0.0644884035f, 0.0709072053f, -0.0454045124f,
+ 0.16288602f, 0.16649379f, 0.02770456f, -0.03698075f,
+ 0.11171641f, 0.043119f , 0.0762981f , -0.01228541f,
+ 0.10439701f, 0.21439962f, 0.11919238f, -0.08390583f};
+
+ // 1: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, output_size]. This output is optional and can be omitted. If this output
+ // is present then output #2 must be present as well.
+ hidl_vec<uint32_t> hiddenStateOutDimensions{batchSize, outputSize};
+ std::vector<float> hiddenStateOutValue(batchSize * outputSize, 0.f);
+ // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, num_units]. This output is optional and can be omitted.
+ hidl_vec<uint32_t> cellStateOutDimensions{batchSize, numUnits};
+ std::vector<float> cellStateOutValue(batchSize * numUnits, 0.f);
+
+ UnidirectionalSequenceLstmTestImpl<HalPolicy>(inputDimensions, inputValue,
+ inputToInputWeightsDimensions, inputToInputWeightsValue,
+ inputToForgetWeightsDimensions, inputToForgetWeightsValue,
+ inputToCellWeightsDimensions, inputToCellWeightsValue,
+ inputToOutputWeightsDimensions, inputToOutputWeightsValue,
+ recurrentToInputWeightsDimensions, recurrentToInputWeightsValue,
+ recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue,
+ recurrentToCellWeightsDimensions, recurrentToCellWeightsValue,
+ recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue,
+ cellToInputWeightsDimensions, cellToInputWeightsValue,
+ cellToForgetWeightsDimensions, cellToForgetWeightsValue,
+ cellToOutputWeightsDimensions, cellToOutputWeightsValue,
+ inputGateBiasDimensions, inputGateBiasValue,
+ forgetGateBiasDimensions, forgetGateBiasValue,
+ cellBiasDimensions, cellBiasValue,
+ outputGateBiasDimensions, outputGateBiasValue,
+ projectionWeightsDimensions, projectionWeightsValue,
+ projectionBiasDimensions, projectionBiasValue,
+ outputStateInDimensions, outputStateInValue,
+ cellStateInDimensions, cellStateInValue,
+ activationFunctionDimensions, activationFunctionValue,
+ cellClippingThresholdDimensions, cellClippingThresholdValue,
+ projectionClippingThresholdDimensions,
+ projectionClippingThresholdValue,
+ timeMajorValue,
+ inputLayerNormWeightsDimensions, inputLayerNormWeightsValue,
+ forgetLayerNormWeightsDimensions, forgetLayerNormWeightsValue,
+ cellLayerNormWeightsDimensions, cellLayerNormWeightsValue,
+ outputLayerNormWeightsDimensions, outputLayerNormWeightsValue,
+ outputDimensions, outputValue,
+ hiddenStateOutDimensions, hiddenStateOutValue,
+ cellStateOutDimensions, cellStateOutValue,
+ compute);
+}
+
+template<typename HalPolicy>
+void UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionTestImpl(armnn::Compute compute)
+{
+ uint32_t batchSize = 2;
+ uint32_t timeSize = 3;
+ uint32_t inputSize = 4;
+ uint32_t outputSize = 5;
+ uint32_t numUnits = 6;
+
+ // Inputs:
+ // 00: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+ // [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length),
+ // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+ hidl_vec<uint32_t> inputDimensions{batchSize, timeSize, inputSize};
+ std::vector<float> inputValue{1., 2., 3., 4., 5., 4.,
+ 3., 2., 1., 2., 3., 4.,
+ 5., 4., 3., 2., 1., 2.,
+ 1., 2., 3., 4., 5., 4.};
+
+ // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size], where “num_units” corresponds to the number of cell units.
+ hidl_vec<uint32_t> inputToInputWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToInputWeightsValue{0.021393683f, 0.06124551f, 0.046905167f, -0.014657677f,
+ -0.03149463f, 0.09171803f, 0.14647801f, 0.10797193f,
+ -0.0057968358f, 0.0019193048f, -0.2726754f, 0.10154029f,
+ -0.018539885f, 0.080349885f, -0.10262385f, -0.022599787f,
+ -0.09121155f, -0.008675967f, -0.045206103f, -0.0821282f,
+ -0.008045952f, 0.015478081f, 0.055217247f, 0.038719587f};
+ // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ hidl_vec<uint32_t> inputToForgetWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToForgetWeightsValue{-0.0018401089f, -0.004852237f, 0.03698424f, 0.014181704f,
+ 0.028273236f, -0.016726194f, -0.05249759f, -0.10204261f,
+ 0.00861066f, -0.040979505f, -0.009899187f, 0.01923892f,
+ -0.028177269f, -0.08535103f, -0.14585495f, 0.10662567f,
+ -0.01909731f, -0.017883534f, -0.0047269356f, -0.045103323f,
+ 0.0030784295f, 0.076784775f, 0.07463696f, 0.094531395f};
+ // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+ hidl_vec<uint32_t> inputToCellWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToCellWeightsValue{-0.04580283f, -0.09549462f, -0.032418985f, -0.06454633f,
+ -0.043528453f, 0.043018587f, -0.049152344f, -0.12418144f,
+ -0.078985475f, -0.07596889f, 0.019484362f, -0.11434962f,
+ -0.0074034138f, -0.06314844f, -0.092981495f, 0.0062155537f,
+ -0.025034338f, -0.0028890965f, 0.048929527f, 0.06235075f,
+ 0.10665918f, -0.032036792f, -0.08505916f, -0.10843358f};
+ // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ hidl_vec<uint32_t> inputToOutputWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToOutputWeightsValue{-0.0998932f, -0.07201956f, -0.052803773f, -0.15629593f,
+ -0.15001918f, -0.07650751f, 0.02359855f, -0.075155355f,
+ -0.08037709f, -0.15093534f, 0.029517552f, -0.04751393f,
+ 0.010350531f, -0.02664851f, -0.016839722f, -0.023121163f,
+ 0.0077019283f, 0.012851257f, -0.05040649f, -0.0129761f,
+ -0.021737747f, -0.038305793f, -0.06870586f, -0.01481247f};
+ // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+ // “num_units”), or the second dimension of the “projection_weights”, if defined.
+ hidl_vec<uint32_t> recurrentToInputWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToInputWeightsValue{-0.001374326f, -0.078856036f, 0.10672688f, 0.029162422f,
+ -0.11585556f, 0.02557986f, -0.13446963f, -0.035785314f,
+ -0.01244275f, 0.025961924f, -0.02337298f, -0.044228926f,
+ -0.055839065f, -0.046598054f, -0.010546039f, -0.06900766f,
+ 0.027239809f, 0.022582639f, -0.013296484f, -0.05459212f,
+ 0.08981f, -0.045407712f, 0.08682226f, -0.06867011f,
+ -0.14390695f, -0.02916037f, 0.000996957f, 0.091420636f,
+ 0.14283475f, -0.07390571f};
+ // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToForgetWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToForgetWeightsValue{-0.057784554f, -0.026057621f, -0.068447545f, -0.022581743f,
+ 0.14811787f, 0.10826372f, 0.09471067f, 0.03987225f,
+ -0.0039523416f, 0.00030638507f, 0.053185795f, 0.10572994f,
+ 0.08414449f, -0.022036452f, -0.00066928595f, -0.09203576f,
+ 0.032950465f, -0.10985798f, -0.023809856f, 0.0021431844f,
+ -0.02196096f, -0.00326074f, 0.00058621005f, -0.074678116f,
+ -0.06193199f, 0.055729095f, 0.03736828f, 0.020123724f,
+ 0.061878487f, -0.04729229f};
+ // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToCellWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToCellWeightsValue{-0.037322544f, 0.018592842f, 0.0056175636f, -0.06253426f,
+ 0.055647098f, -0.05713207f, -0.05626563f, 0.005559383f,
+ 0.03375411f, -0.025757805f, -0.088049285f, 0.06017052f,
+ -0.06570978f, 0.007384076f, 0.035123326f, -0.07920549f,
+ 0.053676967f, 0.044480428f, -0.07663568f, 0.0071805613f,
+ 0.08089997f, 0.05143358f, 0.038261272f, 0.03339287f,
+ -0.027673481f, 0.044746667f, 0.028349208f, 0.020090483f,
+ -0.019443132f, -0.030755889f};
+ // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToOutputWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToOutputWeightsValue{0.025825322f, -0.05813119f, 0.09495884f,
+ -0.045984812f,-0.01255415f, -0.0026479573f,
+ -0.08196161f, -0.054914974f, -0.0046604523f,
+ -0.029587349f, -0.044576716f, -0.07480124f,
+ -0.082868785f, 0.023254942f, 0.027502948f,
+ -0.0039728214f, -0.08683098f, -0.08116779f,
+ -0.014675607f, -0.037924774f, -0.023314456f,
+ -0.007401714f, -0.09255757f, 0.029460307f,
+ -0.08829125f, -0.005139627f, -0.08989442f,
+ -0.0555066f, 0.13596267f, 0.025062224f};
+ // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToInputWeightsDimensions{numUnits};
+ std::vector<float> cellToInputWeightsValue{0.040369894f, 0.030746894f, 0.24704495f,
+ 0.018586371f, -0.037586458f, -0.15312155f};
+ // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToForgetWeightsDimensions{numUnits};
+ std::vector<float> cellToForgetWeightsValue{-0.01998659f, -0.15568835f, -0.24248174f,
+ -0.012770197f, 0.041331276f, -0.072311886f};
+ // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToOutputWeightsDimensions{numUnits};
+ std::vector<float> cellToOutputWeightsValue{0.08286371f, -0.08261836f, -0.51210177f,
+ 0.002913762f, 0.17764764f, -0.5495371f};
+ // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> inputGateBiasDimensions{numUnits};
+ std::vector<float> inputGateBiasValue{0.02234832f, 0.14757581f, 0.18176508f,
+ 0.10380666f, 0.053110216f, -0.06928846f};
+ // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> forgetGateBiasDimensions{numUnits};
+ std::vector<float> forgetGateBiasValue{0.035185695f, -0.042891346f, -0.03032477f,
+ 0.23027696f, 0.11098921f, 0.08989442f};
+ // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellBiasDimensions{numUnits};
+ std::vector<float> cellBiasValue{-0.024379363f, 0.0055531194f, 0.23377132f,
+ 0.033463873f, -0.1483596f, 0.029460307f};
+ // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> outputGateBiasDimensions{numUnits};
+ std::vector<float> outputGateBiasValue{0.046159424f, -0.0012809046f, 0.03563469f,
+ 0.12648113f, 0.027195795f, 0.35373217f};
+ // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [output_size, num_units].
+ hidl_vec<uint32_t> projectionWeightsDimensions{numUnits, outputSize};
+ std::vector<float> projectionWeightsValue{-0.009802181f, 0.09401916f, 0.0717386f, -0.13895074f, 0.09641832f,
+ 0.060420845f, 0.08539281f, 0.054285463f, 0.061395317f, 0.034448683f,
+ -0.042991187f, 0.019801661f, -0.16840284f, -0.015726732f, -0.23041931f,
+ -0.024478018f, -0.10959692f, -0.013875541f, 0.18600968f, -0.061274476f,
+ 0.0138165f, -0.08160894f, -0.07661644f, 0.032372914f, 0.16169067f,
+ 0.22465782f, -0.03993472f, -0.004017731f, 0.08633481f, -0.28869787f};
+ // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+ hidl_vec<uint32_t> projectionBiasDimensions{outputSize};
+ std::vector<float> projectionBiasValue(outputSize, 0.f);
+
+ // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+ hidl_vec<uint32_t> outputStateInDimensions{batchSize, outputSize};
+ std::vector<float> outputStateInValue(batchSize * outputSize, 0.f);
+ // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+ hidl_vec<uint32_t> cellStateInDimensions{batchSize, numUnits};
+ std::vector<float> cellStateInValue(batchSize * numUnits, 0.f);
+
+ // Constant scalar values (the VTS test adds these as tensors of dim {})
+ // 20: The activation function: A value indicating the activation function:
+ // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+ hidl_vec<uint32_t> activationFunctionDimensions{};
+ std::vector<int32_t> activationFunctionValue{4};
+ // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+ // If set to 0.0 then clipping is disabled.
+ hidl_vec<uint32_t> cellClippingThresholdDimensions{};
+ std::vector<float> cellClippingThresholdValue{10.0f};
+ // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+ // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+ hidl_vec<uint32_t> projectionClippingThresholdDimensions{};
+ std::vector<float> projectionClippingThresholdValue{0.f};
+
+ // 23: Time-major if true, batch-major if false.
+ bool timeMajorValue = false;
+
+ // Normalization:
+ // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at input gate.
+ hidl_vec<uint32_t> inputLayerNormWeightsDimensions{0};
+ std::vector<float> inputLayerNormWeightsValue;
+ // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at forget gate.
+ hidl_vec<uint32_t> forgetLayerNormWeightsDimensions{0};
+ std::vector<float> forgetLayerNormWeightsValue;
+ // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at cell gate.
+ hidl_vec<uint32_t> cellLayerNormWeightsDimensions{0};
+ std::vector<float> cellLayerNormWeightsValue;
+ // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at output gate.
+ hidl_vec<uint32_t> outputLayerNormWeightsDimensions{0};
+ std::vector<float> outputLayerNormWeightsValue;
+
+ // Outputs:
+ // 0: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape: if time-major:
+ // [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+ hidl_vec<uint32_t> outputDimensions{batchSize, timeSize, outputSize};
+ std::vector<float> outputValue{-0.0135612f, -0.0263441f, 0.0314008f, -0.00883455f, 0.00763052f,
+ -0.00126877f, -0.0292959f, 0.0449957f, -0.00976195f, -0.00492338f,
+ -0.0175702f, -0.0431753f, 0.0597117f, -0.0169154f, 0.0142087f,
+ 0.00472515f, -0.0196355f, 0.0342524f, -0.00407936f, -0.0253189f,
+ -0.00512944f, -0.0293754f, 0.0512771f, -0.0151874f, -0.0246433f,
+ -0.00744986f, -0.0345103f, 0.0450666f, -0.00944991f, 0.0127171f};
+
+ // 1: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, output_size]. This output is optional and can be omitted. If this output
+ // is present then output #2 must be present as well.
+ hidl_vec<uint32_t> hiddenStateOutDimensions{batchSize, outputSize};
+ std::vector<float> hiddenStateOutValue(batchSize * outputSize, 0.f);
+ // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, num_units]. This output is optional and can be omitted.
+ hidl_vec<uint32_t> cellStateOutDimensions{batchSize, numUnits};
+ std::vector<float> cellStateOutValue(batchSize * numUnits, 0.f);
+
+ UnidirectionalSequenceLstmTestImpl<HalPolicy>(inputDimensions, inputValue,
+ inputToInputWeightsDimensions, inputToInputWeightsValue,
+ inputToForgetWeightsDimensions, inputToForgetWeightsValue,
+ inputToCellWeightsDimensions, inputToCellWeightsValue,
+ inputToOutputWeightsDimensions, inputToOutputWeightsValue,
+ recurrentToInputWeightsDimensions, recurrentToInputWeightsValue,
+ recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue,
+ recurrentToCellWeightsDimensions, recurrentToCellWeightsValue,
+ recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue,
+ cellToInputWeightsDimensions, cellToInputWeightsValue,
+ cellToForgetWeightsDimensions, cellToForgetWeightsValue,
+ cellToOutputWeightsDimensions, cellToOutputWeightsValue,
+ inputGateBiasDimensions, inputGateBiasValue,
+ forgetGateBiasDimensions, forgetGateBiasValue,
+ cellBiasDimensions, cellBiasValue,
+ outputGateBiasDimensions, outputGateBiasValue,
+ projectionWeightsDimensions, projectionWeightsValue,
+ projectionBiasDimensions, projectionBiasValue,
+ outputStateInDimensions, outputStateInValue,
+ cellStateInDimensions, cellStateInValue,
+ activationFunctionDimensions, activationFunctionValue,
+ cellClippingThresholdDimensions, cellClippingThresholdValue,
+ projectionClippingThresholdDimensions,
+ projectionClippingThresholdValue,
+ timeMajorValue,
+ inputLayerNormWeightsDimensions, inputLayerNormWeightsValue,
+ forgetLayerNormWeightsDimensions, forgetLayerNormWeightsValue,
+ cellLayerNormWeightsDimensions, cellLayerNormWeightsValue,
+ outputLayerNormWeightsDimensions, outputLayerNormWeightsValue,
+ outputDimensions, outputValue,
+ hiddenStateOutDimensions, hiddenStateOutValue,
+ cellStateOutDimensions, cellStateOutValue,
+ compute, 0.0031454);
+}
+
+template<typename HalPolicy>
+void UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl(armnn::Compute compute)
+{
+ uint32_t batchSize = 3;
+ uint32_t timeSize = 2;
+ uint32_t inputSize = 3;
+ uint32_t outputSize = 4;
+ uint32_t numUnits = 5;
+
+ // Inputs:
+ // 00: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+ // [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length),
+ // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+ hidl_vec<uint32_t> inputDimensions{batchSize, timeSize, inputSize};
+ std::vector<float> inputValue{1., 2., 3., 4., 5., 4.,
+ 3., 2., 1., 2., 3., 4.,
+ 5., 4., 3., 2., 1., 2.};
+
+ // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size], where “num_units” corresponds to the number of cell units.
+ hidl_vec<uint32_t> inputToInputWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToInputWeightsValue{-0.49536117f, -0.0556083915f, -0.102400711f,
+ -0.117484632f, 0.3298470976f, -0.1179017122f,
+ 0.214305695f, 0.42135173085f, 0.003878414626f,
+ -0.348303917f, -0.1881275477f, 0.0343011027f,
+ -0.38837709614f, -0.05636804124f, 0.4259087456f};
+ // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ hidl_vec<uint32_t> inputToForgetWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToForgetWeightsValue{0.2415594226f, 0.15400093799f, 0.4566498398f,
+ -0.3810434485f, 0.268383264f, -0.009807467424f,
+ -0.3522925403f, -0.24275735512f, -0.28344226125f,
+ 0.13512269116f, -0.4932442977f, -0.10039821991f,
+ 0.2726137042f, 0.09216640889f, -0.06551410215f};
+ // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+ hidl_vec<uint32_t> inputToCellWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToCellWeightsValue{-0.2504855627f, 0.184490025045f, -0.2480507493f,
+ 0.386399507f, -0.259465157985f, -0.16545993089f,
+ -0.4230232555f, 0.341664791103f, -0.18127849691f,
+ -0.2277662414f, -0.55275535589f, 0.34184026718f,
+ 0.3954237699f, -0.19407111404f, 0.30412107706f};
+ // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ hidl_vec<uint32_t> inputToOutputWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToOutputWeightsValue{0.2303854227f, 0.5218806862f, -0.4865379333f,
+ 0.53969591851f, 0.23393625035f, -0.27140527306f,
+ 0.50009280443f, 0.07511717046f, 0.3998299249f,
+ -0.51717478049f, 0.1889653282f, -0.367323637f,
+ -0.12584099173f, -0.12319286912f, 0.2407919466f};
+ // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+ // “num_units”), or the second dimension of the “projection_weights”, if defined.
+ hidl_vec<uint32_t> recurrentToInputWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToInputWeightsValue{-0.128009796112f, 0.1995525098f, -0.07745539397f, 0.1558421701f,
+ -0.265254765766f, -0.38837709614f, -0.05636804124f, 0.4259087456f,
+ 0.17628988623f, 0.3877420127f, 0.53300309181f, -0.0959980934f,
+ 0.00302857416f, 0.3266998827f, -0.142509296562f, -0.04433270756f,
+ 0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f};
+ // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToForgetWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToForgetWeightsValue{-0.09499983487f, -0.08814888417f, -0.04834804721f, 0.1516668247f,
+ -0.3967529535f, -0.06463699788f, 0.4952811002f, 0.003274492938f,
+ -0.0968840941f, 0.17928104102f, 0.0031281141592f, -0.3387276584f,
+ -0.3587934076f, 0.06705895066f, 0.22463923692f, 0.1961955726f,
+ 0.01841056f, -0.32764608f, -0.33027974f, -0.10826075f};
+ // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToCellWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToCellWeightsValue{-0.21938985582f, -0.3023648226f, -0.1170005202f, -0.3509177422f,
+ -0.4286288613f, 0.2726137042f, 0.09216640889f, -0.06551410215f,
+ 0.20453298098f, 0.2393476665f, 0.11846517771f, 0.2630801796f,
+ 0.3954237699f, -0.19407111404f, 0.30412107706f, -0.27342408554f,
+ 0.19069612f, -0.03026325f, -0.54532051f, 0.33003211f};
+ // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToOutputWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToOutputWeightsValue{-0.32921677827f, 0.32624614238f, -0.1388191282f,
+ -0.17879831790f,-0.15185534954f, -0.16918526583f,
+ -0.10087361183f, -0.5436913968f, 0.016758225858f,
+ 0.30454617738f, -0.41493862867f, -0.005565764375f,
+ -0.12584099173f, -0.12319286912f, 0.2407919466f,
+ -0.08879069983f, 0.11178309f, 0.09481031f,
+ -0.26424935f, 0.46261835f};
+ // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToInputWeightsDimensions{numUnits};
+ std::vector<float> cellToInputWeightsValue{0.05f, 0.1f, 0.25f, 0.15f, -0.02f};
+ // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToForgetWeightsDimensions{numUnits};
+ std::vector<float> cellToForgetWeightsValue{-0.02f, -0.15f, -0.25f, -0.03f, 0.15f};
+ // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToOutputWeightsDimensions{numUnits};
+ std::vector<float> cellToOutputWeightsValue{0.1f, -0.1f, -0.5f, 0.05f, 0.01f};
+ // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> inputGateBiasDimensions{numUnits};
+ std::vector<float> inputGateBiasValue{0.03f, 0.15f, 0.22f, 0.38f, 0.05f};
+ // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> forgetGateBiasDimensions{numUnits};
+ std::vector<float> forgetGateBiasValue{0.1f, -0.3f, -0.2f, 0.1f, 0.4f};
+ // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellBiasDimensions{numUnits};
+ std::vector<float> cellBiasValue{-0.05f, 0.72f, 0.25f, 0.08f, 0.1f};
+ // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> outputGateBiasDimensions{numUnits};
+ std::vector<float> outputGateBiasValue{0.05f, -0.01f, 0.2f, 0.1f, -0.2f};
+ // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [output_size, num_units].
+ hidl_vec<uint32_t> projectionWeightsDimensions{numUnits, outputSize};
+ std::vector<float> projectionWeightsValue{-0.1f, 0.2f, 0.01f, -0.2f,
+ 0.1f, 0.5f, 0.3f, 0.08f,
+ 0.07f, 0.2f, -0.4f, 0.2f,
+ 0.5f, -0.4f, 0.3f, -0.2f,
+ 0.3f, 0.08f, -0.07f, 0.2f};
+ // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+ hidl_vec<uint32_t> projectionBiasDimensions{outputSize};
+ std::vector<float> projectionBiasValue(outputSize, 0.f);
+
+ // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+ hidl_vec<uint32_t> outputStateInDimensions{batchSize, outputSize};
+ std::vector<float> outputStateInValue(batchSize * outputSize, 0.f);
+ // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+ hidl_vec<uint32_t> cellStateInDimensions{batchSize, numUnits};
+ std::vector<float> cellStateInValue(batchSize * numUnits, 0.f);
+
+ // Constant scalar values (the VTS test adds these as tensors of dim {})
+ // 20: The activation function: A value indicating the activation function:
+ // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+ hidl_vec<uint32_t> activationFunctionDimensions{};
+ std::vector<int32_t> activationFunctionValue{4};
+ // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+ // If set to 0.0 then clipping is disabled.
+ hidl_vec<uint32_t> cellClippingThresholdDimensions{};
+ std::vector<float> cellClippingThresholdValue{10.0f};
+ // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+ // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+ hidl_vec<uint32_t> projectionClippingThresholdDimensions{};
+ std::vector<float> projectionClippingThresholdValue{0.f};
+
+ // 23: Time-major if true, batch-major if false.
+ bool timeMajorValue = false;
+
+ // Normalization:
+ // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at input gate.
+ hidl_vec<uint32_t> inputLayerNormWeightsDimensions{numUnits};
+ std::vector<float> inputLayerNormWeightsValue{0.1f, 0.2f, 0.3f, 0.5f, 0.8f};
+ // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at forget gate.
+ hidl_vec<uint32_t> forgetLayerNormWeightsDimensions{numUnits};
+ std::vector<float> forgetLayerNormWeightsValue{0.1f, 0.2f, 0.3f, 0.5f, 0.2f};
+ // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at cell gate.
+ hidl_vec<uint32_t> cellLayerNormWeightsDimensions{numUnits};
+ std::vector<float> cellLayerNormWeightsValue{0.7f, 0.2f, 0.3f, 0.8f, 0.5f};
+ // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at output gate.
+ hidl_vec<uint32_t> outputLayerNormWeightsDimensions{numUnits};
+ std::vector<float> outputLayerNormWeightsValue{0.6f, 0.2f, 0.2f, 0.5f, 0.1f};
+
+ // Outputs:
+ // 0: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape: if time-major:
+ // [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+ hidl_vec<uint32_t> outputDimensions{batchSize, timeSize, outputSize};
+ std::vector<float> outputValue{0.0642256f, 0.0343966f, 0.184122f, 0.114717f,
+ 0.11458f, 0.0407109f, 0.300327f, 0.174301f,
+ 0.0864761f, 0.0362912f, 0.178635f, 0.115689f,
+ 0.108008f, 0.0386623f, 0.273471f, 0.167115f,
+ 0.0859545f, 0.0331481f, 0.186051f, 0.11888f,
+ 0.106649f, 0.0276847f, 0.229863f, 0.166958f};
+
+ // 1: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, output_size]. This output is optional and can be omitted. If this output
+ // is present then output #2 must be present as well.
+ hidl_vec<uint32_t> hiddenStateOutDimensions{batchSize, outputSize};
+ std::vector<float> hiddenStateOutValue(batchSize * outputSize, 0.f);
+ // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, num_units]. This output is optional and can be omitted.
+ hidl_vec<uint32_t> cellStateOutDimensions{batchSize, numUnits};
+ std::vector<float> cellStateOutValue(batchSize * numUnits, 0.f);
+
+ UnidirectionalSequenceLstmTestImpl<HalPolicy>(inputDimensions, inputValue,
+ inputToInputWeightsDimensions, inputToInputWeightsValue,
+ inputToForgetWeightsDimensions, inputToForgetWeightsValue,
+ inputToCellWeightsDimensions, inputToCellWeightsValue,
+ inputToOutputWeightsDimensions, inputToOutputWeightsValue,
+ recurrentToInputWeightsDimensions, recurrentToInputWeightsValue,
+ recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue,
+ recurrentToCellWeightsDimensions, recurrentToCellWeightsValue,
+ recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue,
+ cellToInputWeightsDimensions, cellToInputWeightsValue,
+ cellToForgetWeightsDimensions, cellToForgetWeightsValue,
+ cellToOutputWeightsDimensions, cellToOutputWeightsValue,
+ inputGateBiasDimensions, inputGateBiasValue,
+ forgetGateBiasDimensions, forgetGateBiasValue,
+ cellBiasDimensions, cellBiasValue,
+ outputGateBiasDimensions, outputGateBiasValue,
+ projectionWeightsDimensions, projectionWeightsValue,
+ projectionBiasDimensions, projectionBiasValue,
+ outputStateInDimensions, outputStateInValue,
+ cellStateInDimensions, cellStateInValue,
+ activationFunctionDimensions, activationFunctionValue,
+ cellClippingThresholdDimensions, cellClippingThresholdValue,
+ projectionClippingThresholdDimensions,
+ projectionClippingThresholdValue,
+ timeMajorValue,
+ inputLayerNormWeightsDimensions, inputLayerNormWeightsValue,
+ forgetLayerNormWeightsDimensions, forgetLayerNormWeightsValue,
+ cellLayerNormWeightsDimensions, cellLayerNormWeightsValue,
+ outputLayerNormWeightsDimensions, outputLayerNormWeightsValue,
+ outputDimensions, outputValue,
+ hiddenStateOutDimensions, hiddenStateOutValue,
+ cellStateOutDimensions, cellStateOutValue,
+ compute);
+}
+
+template<typename HalPolicy>
+void UnidirectionalSequenceLstmWithCifgWithPeepholeNoProjectionTestImpl(armnn::Compute compute)
+{
+ uint32_t batchSize = 3;
+ uint32_t timeSize = 2;
+ uint32_t inputSize = 3;
+ uint32_t outputSize = 4;
+ uint32_t numUnits = outputSize;
+
+ // Inputs:
+ // 00: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+ // [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length),
+ // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+ hidl_vec<uint32_t> inputDimensions{batchSize, timeSize, inputSize};
+ std::vector<float> inputValue{1., 2., 3., 4., 5., 4.,
+ 3., 2., 1., 2., 3., 4.,
+ 5., 4., 3., 2., 1., 2.};
+
+ // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size], where “num_units” corresponds to the number of cell units.
+ hidl_vec<uint32_t> inputToInputWeightsDimensions{0};
+ std::vector<float> inputToInputWeightsValue;
+ // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ hidl_vec<uint32_t> inputToForgetWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToForgetWeightsValue{0.2415594226f, 0.15400093799f, 0.4566498398f,
+ -0.3810434485f, 0.268383264f, -0.009807467424f,
+ -0.3522925403f, -0.24275735512f, -0.28344226125f,
+ 0.13512269116f, -0.4932442977f, -0.10039821991f};
+ // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+ hidl_vec<uint32_t> inputToCellWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToCellWeightsValue{-0.2504855627f, 0.184490025045f, -0.2480507493f,
+ 0.386399507f, -0.259465157985f, -0.16545993089f,
+ -0.4230232555f, 0.341664791103f, -0.18127849691f,
+ -0.2277662414f, -0.55275535589f, 0.34184026718f};
+ // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, input_size].
+ hidl_vec<uint32_t> inputToOutputWeightsDimensions{numUnits, inputSize};
+ std::vector<float> inputToOutputWeightsValue{0.2303854227f, 0.5218806862f, -0.4865379333f,
+ 0.53969591851f, 0.23393625035f, -0.27140527306f,
+ 0.50009280443f, 0.07511717046f, 0.3998299249f,
+ -0.51717478049f, 0.1889653282f, -0.367323637f};
+ // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+ // “num_units”), or the second dimension of the “projection_weights”, if defined.
+ hidl_vec<uint32_t> recurrentToInputWeightsDimensions{0};
+ std::vector<float> recurrentToInputWeightsValue;
+ // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToForgetWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToForgetWeightsValue{-0.09499983487f, -0.08814888417f, -0.04834804721f, 0.1516668247f,
+ -0.3967529535f, -0.06463699788f, 0.4952811002f, 0.003274492938f,
+ -0.0968840941f, 0.17928104102f, 0.0031281141592f, -0.3387276584f,
+ -0.3587934076f, 0.06705895066f, 0.22463923692f, 0.1961955726f};
+ // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToCellWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToCellWeightsValue{-0.21938985582f, -0.3023648226f, -0.1170005202f, -0.3509177422f,
+ -0.4286288613f, 0.2726137042f, 0.09216640889f, -0.06551410215f,
+ 0.20453298098f, 0.2393476665f, 0.11846517771f, 0.2630801796f,
+ 0.3954237699f, -0.19407111404f, 0.30412107706f, -0.27342408554f};
+ // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [num_units, output_size].
+ hidl_vec<uint32_t> recurrentToOutputWeightsDimensions{numUnits, outputSize};
+ std::vector<float> recurrentToOutputWeightsValue{-0.32921677827f, 0.32624614238f, -0.1388191282f,
+ -0.17879831790f, -0.15185534954f, -0.16918526583f,
+ -0.10087361183f, -0.5436913968f, 0.016758225858f,
+ 0.30454617738f, -0.41493862867f, -0.005565764375f,
+ -0.12584099173f, -0.12319286912f, 0.2407919466f,
+ -0.08879069983f};
+ // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToInputWeightsDimensions{0};
+ std::vector<float> cellToInputWeightsValue;
+ // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToForgetWeightsDimensions{numUnits};
+ std::vector<float> cellToForgetWeightsValue{0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f};
+ // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellToOutputWeightsDimensions{numUnits};
+ std::vector<float> cellToOutputWeightsValue{-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f};
+ // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> inputGateBiasDimensions{0};
+ std::vector<float> inputGateBiasValue;
+ // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> forgetGateBiasDimensions{numUnits};
+ std::vector<float> forgetGateBiasValue{1., 1., 1., 1.};
+ // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> cellBiasDimensions{numUnits};
+ std::vector<float> cellBiasValue{0., 0., 0., 0.};
+ // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+ hidl_vec<uint32_t> outputGateBiasDimensions{numUnits};
+ std::vector<float> outputGateBiasValue{0., 0., 0., 0.};
+ // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+ // [output_size, num_units].
+ hidl_vec<uint32_t> projectionWeightsDimensions{0};
+ std::vector<float> projectionWeightsValue;
+ // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+ hidl_vec<uint32_t> projectionBiasDimensions{0};
+ std::vector<float> projectionBiasValue;
+
+ // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+ hidl_vec<uint32_t> outputStateInDimensions{batchSize, outputSize};
+ std::vector<float> outputStateInValue(batchSize * outputSize, 0.f);
+ // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+ hidl_vec<uint32_t> cellStateInDimensions{batchSize, numUnits};
+ std::vector<float> cellStateInValue(batchSize * numUnits, 0.f);
+
+ // Constant scalar values (the VTS test adds these as tensors of dim {})
+ // 20: The activation function: A value indicating the activation function:
+ // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+ hidl_vec<uint32_t> activationFunctionDimensions{};
+ std::vector<int32_t> activationFunctionValue{4};
+ // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+ // If set to 0.0 then clipping is disabled.
+ hidl_vec<uint32_t> cellClippingThresholdDimensions{};
+ std::vector<float> cellClippingThresholdValue{10.0f};
+ // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+ // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+ hidl_vec<uint32_t> projectionClippingThresholdDimensions{};
+ std::vector<float> projectionClippingThresholdValue{0.f};
+
+ // 23: Time-major if true, batch-major if false.
+ bool timeMajorValue = false;
+
+ // Normalization:
+ // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at input gate.
+ hidl_vec<uint32_t> inputLayerNormWeightsDimensions{0};
+ std::vector<float> inputLayerNormWeightsValue;
+ // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at forget gate.
+ hidl_vec<uint32_t> forgetLayerNormWeightsDimensions{0};
+ std::vector<float> forgetLayerNormWeightsValue;
+ // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at cell gate.
+ hidl_vec<uint32_t> cellLayerNormWeightsDimensions{0};
+ std::vector<float> cellLayerNormWeightsValue;
+ // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+ // Used to rescale normalized inputs to activation at output gate.
+ hidl_vec<uint32_t> outputLayerNormWeightsDimensions{0};
+ std::vector<float> outputLayerNormWeightsValue;
+
+ // Outputs:
+ // 0: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape: if time-major:
+ // [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+ hidl_vec<uint32_t> outputDimensions{batchSize, timeSize, outputSize};
+ std::vector<float> outputValue{-0.0129257f, -0.070531f, -0.153508f, -0.0392391f,
+ -0.0300169f, -0.195717f, -0.528679f, -0.0818106f,
+ -0.0332748f, 0.155429f, -0.353966f, -0.0801505f,
+ -0.032312f, -0.0407911f, -0.435053f, -0.0932317f,
+ -0.0108233f, 0.165584f, -0.640424f, -0.0447535f,
+ -0.031675f, 0.125987f, -0.526695f, -0.110093f};
+
+ // 1: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, output_size]. This output is optional and can be omitted. If this output
+ // is present then output #2 must be present as well.
+ hidl_vec<uint32_t> hiddenStateOutDimensions{batchSize, outputSize};
+ std::vector<float> hiddenStateOutValue(batchSize * outputSize, 0.f);
+ // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+ // [batch_size, num_units]. This output is optional and can be omitted.
+ hidl_vec<uint32_t> cellStateOutDimensions{batchSize, numUnits};
+ std::vector<float> cellStateOutValue(batchSize * numUnits, 0.f);
+
+ UnidirectionalSequenceLstmTestImpl<HalPolicy>(inputDimensions, inputValue,
+ inputToInputWeightsDimensions, inputToInputWeightsValue,
+ inputToForgetWeightsDimensions, inputToForgetWeightsValue,
+ inputToCellWeightsDimensions, inputToCellWeightsValue,
+ inputToOutputWeightsDimensions, inputToOutputWeightsValue,
+ recurrentToInputWeightsDimensions, recurrentToInputWeightsValue,
+ recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue,
+ recurrentToCellWeightsDimensions, recurrentToCellWeightsValue,
+ recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue,
+ cellToInputWeightsDimensions, cellToInputWeightsValue,
+ cellToForgetWeightsDimensions, cellToForgetWeightsValue,
+ cellToOutputWeightsDimensions, cellToOutputWeightsValue,
+ inputGateBiasDimensions, inputGateBiasValue,
+ forgetGateBiasDimensions, forgetGateBiasValue,
+ cellBiasDimensions, cellBiasValue,
+ outputGateBiasDimensions, outputGateBiasValue,
+ projectionWeightsDimensions, projectionWeightsValue,
+ projectionBiasDimensions, projectionBiasValue,
+ outputStateInDimensions, outputStateInValue,
+ cellStateInDimensions, cellStateInValue,
+ activationFunctionDimensions, activationFunctionValue,
+ cellClippingThresholdDimensions, cellClippingThresholdValue,
+ projectionClippingThresholdDimensions,
+ projectionClippingThresholdValue,
+ timeMajorValue,
+ inputLayerNormWeightsDimensions, inputLayerNormWeightsValue,
+ forgetLayerNormWeightsDimensions, forgetLayerNormWeightsValue,
+ cellLayerNormWeightsDimensions, cellLayerNormWeightsValue,
+ outputLayerNormWeightsDimensions, outputLayerNormWeightsValue,
+ outputDimensions, outputValue,
+ hiddenStateOutDimensions, hiddenStateOutValue,
+ cellStateOutDimensions, cellStateOutValue,
+ compute);
+} \ No newline at end of file
diff --git a/test/UtilsTests.cpp b/test/UtilsTests.cpp
index de84bb49..68d7b501 100644
--- a/test/UtilsTests.cpp
+++ b/test/UtilsTests.cpp
@@ -1,23 +1,18 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "DriverTestHelpers.hpp"
-#include <boost/test/unit_test.hpp>
#include <log/log.h>
-#include "../Utils.hpp"
#include <armnn/src/armnn/OptimizedNetworkImpl.hpp>
#include <fstream>
-#include <iomanip>
#include <memory>
#include <armnn/INetwork.hpp>
-#include "armnn/NetworkFwd.hpp"
-
-#include <Filesystem.hpp>
+#include <armnnUtils/Filesystem.hpp>
using namespace android;
using namespace android::nn;
@@ -50,7 +45,7 @@ public:
return stream.good() ? ::armnn::Status::Success : ::armnn::Status::Failure;
}
- ::armnn::profiling::ProfilingGuid GetGuid() const final { return ::armnn::profiling::ProfilingGuid(0); }
+ ::arm::pipe::ProfilingGuid GetGuid() const final { return ::arm::pipe::ProfilingGuid(0); }
void UpdateMockSerializedContent(const std::string& mockSerializedContent)
{
@@ -64,7 +59,6 @@ private:
} // armnn namespace
-BOOST_AUTO_TEST_SUITE(UtilsTests)
// The following are helpers for writing unit tests for the driver.
namespace
@@ -78,10 +72,9 @@ public:
ExportNetworkGraphFixture()
: ExportNetworkGraphFixture("/data")
{}
+
ExportNetworkGraphFixture(const std::string& requestInputsAndOutputsDumpDir)
- : m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
- , m_FileName()
- , m_FileStream()
+ : m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir), m_FileName(), m_FileStream()
{
// Set the name of the output .dot file.
// NOTE: the export now uses a time stamp to name the file so we
@@ -97,7 +90,7 @@ public:
m_FileStream.close();
// Ignore any error (such as file not found).
- (void)remove(m_FileName.c_str());
+ (void) remove(m_FileName.c_str());
}
bool FileExists()
@@ -147,10 +140,12 @@ private:
};
-
} // namespace
-BOOST_AUTO_TEST_CASE(ExportToEmptyDirectory)
+DOCTEST_TEST_SUITE("UtilsTests")
+{
+
+DOCTEST_TEST_CASE("ExportToEmptyDirectory")
{
// Set the fixture for this test.
ExportNetworkGraphFixture fixture("");
@@ -167,13 +162,13 @@ BOOST_AUTO_TEST_CASE(ExportToEmptyDirectory)
// Export the mock optimized network.
fixture.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
- fixture.m_RequestInputsAndOutputsDumpDir);
+ fixture.m_RequestInputsAndOutputsDumpDir);
// Check that the output file does not exist.
- BOOST_TEST(!fixture.FileExists());
+ DOCTEST_CHECK(!fixture.FileExists());
}
-BOOST_AUTO_TEST_CASE(ExportNetwork)
+DOCTEST_TEST_CASE("ExportNetwork")
{
// Set the fixture for this test.
ExportNetworkGraphFixture fixture;
@@ -191,16 +186,16 @@ BOOST_AUTO_TEST_CASE(ExportNetwork)
// Export the mock optimized network.
fixture.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
- fixture.m_RequestInputsAndOutputsDumpDir);
+ fixture.m_RequestInputsAndOutputsDumpDir);
// Check that the output file exists and that it has the correct name.
- BOOST_TEST(fixture.FileExists());
+ DOCTEST_CHECK(fixture.FileExists());
// Check that the content of the output file matches the mock content.
- BOOST_TEST(fixture.GetFileContent() == mockSerializedContent);
+ DOCTEST_CHECK(fixture.GetFileContent() == mockSerializedContent);
}
-BOOST_AUTO_TEST_CASE(ExportNetworkOverwriteFile)
+DOCTEST_TEST_CASE("ExportNetworkOverwriteFile")
{
// Set the fixture for this test.
ExportNetworkGraphFixture fixture;
@@ -217,13 +212,13 @@ BOOST_AUTO_TEST_CASE(ExportNetworkOverwriteFile)
// Export the mock optimized network.
fixture.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
- fixture.m_RequestInputsAndOutputsDumpDir);
+ fixture.m_RequestInputsAndOutputsDumpDir);
// Check that the output file exists and that it has the correct name.
- BOOST_TEST(fixture.FileExists());
+ DOCTEST_CHECK(fixture.FileExists());
// Check that the content of the output file matches the mock content.
- BOOST_TEST(fixture.GetFileContent() == mockSerializedContent);
+ DOCTEST_CHECK(fixture.GetFileContent() == mockSerializedContent);
// Update the mock serialized content of the network.
mockSerializedContent = "This is ANOTHER mock serialized content!";
@@ -235,16 +230,16 @@ BOOST_AUTO_TEST_CASE(ExportNetworkOverwriteFile)
// Export the mock optimized network.
fixture.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork2,
- fixture.m_RequestInputsAndOutputsDumpDir);
+ fixture.m_RequestInputsAndOutputsDumpDir);
// Check that the output file still exists and that it has the correct name.
- BOOST_TEST(fixture.FileExists());
+ DOCTEST_CHECK(fixture.FileExists());
// Check that the content of the output file matches the mock content.
- BOOST_TEST(fixture.GetFileContent() == mockSerializedContent);
+ DOCTEST_CHECK(fixture.GetFileContent() == mockSerializedContent);
}
-BOOST_AUTO_TEST_CASE(ExportMultipleNetworks)
+DOCTEST_TEST_CASE("ExportMultipleNetworks")
{
// Set the fixtures for this test.
ExportNetworkGraphFixture fixture1;
@@ -263,32 +258,32 @@ BOOST_AUTO_TEST_CASE(ExportMultipleNetworks)
// Export the mock optimized network.
fixture1.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
- fixture1.m_RequestInputsAndOutputsDumpDir);
+ fixture1.m_RequestInputsAndOutputsDumpDir);
// Check that the output file exists and that it has the correct name.
- BOOST_TEST(fixture1.FileExists());
+ DOCTEST_CHECK(fixture1.FileExists());
// Check that the content of the output file matches the mock content.
- BOOST_TEST(fixture1.GetFileContent() == mockSerializedContent);
+ DOCTEST_CHECK(fixture1.GetFileContent() == mockSerializedContent);
// Export the mock optimized network.
fixture2.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
- fixture2.m_RequestInputsAndOutputsDumpDir);
+ fixture2.m_RequestInputsAndOutputsDumpDir);
// Check that the output file exists and that it has the correct name.
- BOOST_TEST(fixture2.FileExists());
+ DOCTEST_CHECK(fixture2.FileExists());
// Check that the content of the output file matches the mock content.
- BOOST_TEST(fixture2.GetFileContent() == mockSerializedContent);
+ DOCTEST_CHECK(fixture2.GetFileContent() == mockSerializedContent);
// Export the mock optimized network.
fixture3.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
- fixture3.m_RequestInputsAndOutputsDumpDir);
+ fixture3.m_RequestInputsAndOutputsDumpDir);
// Check that the output file exists and that it has the correct name.
- BOOST_TEST(fixture3.FileExists());
+ DOCTEST_CHECK(fixture3.FileExists());
// Check that the content of the output file matches the mock content.
- BOOST_TEST(fixture3.GetFileContent() == mockSerializedContent);
+ DOCTEST_CHECK(fixture3.GetFileContent() == mockSerializedContent);
}
-BOOST_AUTO_TEST_SUITE_END()
+}