From ce3e84a8d449cbf31cee57e30f0eef6a96c0ce94 Mon Sep 17 00:00:00 2001 From: telsoa01 Date: Fri, 31 Aug 2018 09:31:35 +0100 Subject: Release 18.08 --- 1.0/ArmnnDriver.hpp | 66 + 1.0/ArmnnDriverImpl.cpp | 277 ++++ 1.0/ArmnnDriverImpl.hpp | 41 + 1.1/ArmnnDriver.hpp | 103 ++ 1.1/ArmnnDriverImpl.cpp | 151 +++ 1.1/ArmnnDriverImpl.hpp | 39 + Android.bp | 29 +- Android.mk | 261 +++- ArmnnDevice.cpp | 65 + ArmnnDevice.hpp | 27 + ArmnnDriver.cpp | 453 ------- ArmnnDriver.hpp | 75 +- ArmnnPreparedModel.cpp | 113 +- ArmnnPreparedModel.hpp | 51 +- DriverOptions.cpp | 159 +++ DriverOptions.hpp | 43 + ModelToINetworkConverter.cpp | 1003 ++++++++++---- ModelToINetworkConverter.hpp | 129 +- NnapiSupport.txt | 19 +- README.md | 30 +- RequestThread.hpp | 5 +- SystemPropertiesUtils.hpp | 3 +- Utils.cpp | 45 +- Utils.hpp | 24 +- ...id.hardware.neuralnetworks@1.1-service-armnn.rc | 4 + service.cpp | 1 - test/Android.mk | 166 ++- test/Concurrent.cpp | 6 +- test/Convolution2D.cpp | 10 +- test/DriverTestHelpers.cpp | 26 +- test/DriverTestHelpers.hpp | 23 +- test/FullyConnected.cpp | 16 +- test/GenericLayerTests.cpp | 222 ++-- test/Lstm.cpp | 1397 ++++++++++++++++++++ test/Merger.cpp | 160 ++- test/Tests.cpp | 8 +- test/UtilsTests.cpp | 11 +- 37 files changed, 4045 insertions(+), 1216 deletions(-) create mode 100644 1.0/ArmnnDriver.hpp create mode 100644 1.0/ArmnnDriverImpl.cpp create mode 100644 1.0/ArmnnDriverImpl.hpp create mode 100644 1.1/ArmnnDriver.hpp create mode 100644 1.1/ArmnnDriverImpl.cpp create mode 100644 1.1/ArmnnDriverImpl.hpp create mode 100644 ArmnnDevice.cpp create mode 100644 ArmnnDevice.hpp delete mode 100644 ArmnnDriver.cpp create mode 100644 DriverOptions.cpp create mode 100644 DriverOptions.hpp create mode 100644 android.hardware.neuralnetworks@1.1-service-armnn.rc create mode 100644 test/Lstm.cpp diff --git a/1.0/ArmnnDriver.hpp b/1.0/ArmnnDriver.hpp new file mode 100644 index 00000000..83484ca9 --- /dev/null +++ b/1.0/ArmnnDriver.hpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include + +#include "ArmnnDriverImpl.hpp" +#include "ArmnnDevice.hpp" + +#include + +namespace armnn_driver +{ +namespace V1_0 +{ + +class ArmnnDriver : public ArmnnDevice, public ::android::hardware::neuralnetworks::V1_0::IDevice +{ +public: + ArmnnDriver(DriverOptions options) + : ArmnnDevice(std::move(options)) + { + ALOGV("V1_0::ArmnnDriver::ArmnnDriver()"); + } + ~ArmnnDriver() {} + +public: + Return getCapabilities( + ::android::hardware::neuralnetworks::V1_0::IDevice::getCapabilities_cb cb) + { + ALOGV("V1_0::ArmnnDriver::getCapabilities()"); + + return ArmnnDriverImpl::getCapabilities(m_Runtime, cb); + } + + Return getSupportedOperations( + const ::android::hardware::neuralnetworks::V1_0::Model& model, + ::android::hardware::neuralnetworks::V1_0::IDevice::getSupportedOperations_cb cb) + { + ALOGV("V1_0::ArmnnDriver::getSupportedOperations()"); + + return ArmnnDriverImpl::getSupportedOperations(m_Runtime, m_Options, model, cb); + } + + Return prepareModel( + const ::android::hardware::neuralnetworks::V1_0::Model& model, + const android::sp& cb) + { + ALOGV("V1_0::ArmnnDriver::prepareModel()"); + + return ArmnnDriverImpl::prepareModel(m_Runtime, m_ClTunedParameters, m_Options, model, cb); + } + + Return getStatus() + { + ALOGV("V1_0::ArmnnDriver::getStatus()"); + + return ArmnnDriverImpl::getStatus(); + } +}; + +} // armnn_driver::namespace V1_0 +} // namespace armnn_driver diff --git a/1.0/ArmnnDriverImpl.cpp b/1.0/ArmnnDriverImpl.cpp new file mode 100644 index 00000000..5429ebed --- /dev/null +++ b/1.0/ArmnnDriverImpl.cpp @@ -0,0 +1,277 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ArmnnDriverImpl.hpp" +#include "ModelToINetworkConverter.hpp" +#include "ArmnnPreparedModel.hpp" +#include "SystemPropertiesUtils.hpp" + +#if defined(ARMNN_ANDROID_P) +// The headers of the ML framework have changed between Android O and Android P. +// The validation functions have been moved into their own header, ValidateHal.h. +#include +#endif + +#include + +using namespace std; +using namespace android; +using namespace android::nn; +using namespace android::hardware; + +namespace +{ + +const char *g_Float32PerformanceExecTimeName = "ArmNN.float32Performance.execTime"; +const char *g_Float32PerformancePowerUsageName = "ArmNN.float32Performance.powerUsage"; +const char *g_Quantized8PerformanceExecTimeName = "ArmNN.quantized8Performance.execTime"; +const char *g_Quantized8PerformancePowerUsageName = "ArmNN.quantized8Performance.powerUsage"; + +void NotifyCallbackAndCheck(const sp& callback, + ErrorStatus errorStatus, + const sp& preparedModelPtr) +{ + Return returned = callback->notify(errorStatus, preparedModelPtr); + // This check is required, if the callback fails and it isn't checked it will bring down the service + if (!returned.isOk()) + { + ALOGE("V1_0::ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ", + returned.description().c_str()); + } +} + +Return FailPrepareModel(ErrorStatus error, + const string& message, + const sp& callback) +{ + ALOGW("V1_0::ArmnnDriverImpl::prepareModel: %s", message.c_str()); + NotifyCallbackAndCheck(callback, error, nullptr); + return error; +} + +} // namespace + +namespace armnn_driver +{ +namespace V1_0 +{ + +Return ArmnnDriverImpl::getCapabilities( + const armnn::IRuntimePtr& runtime, + neuralnetworks::V1_0::IDevice::getCapabilities_cb cb) +{ + ALOGV("V1_0::ArmnnDriverImpl::getCapabilities()"); + + neuralnetworks::V1_0::Capabilities capabilities; + if (runtime) + { + capabilities.float32Performance.execTime = + ParseSystemProperty(g_Float32PerformanceExecTimeName, .1f); + + capabilities.float32Performance.powerUsage = + ParseSystemProperty(g_Float32PerformancePowerUsageName, .1f); + + capabilities.quantized8Performance.execTime = + ParseSystemProperty(g_Quantized8PerformanceExecTimeName, .1f); + + capabilities.quantized8Performance.powerUsage = + ParseSystemProperty(g_Quantized8PerformancePowerUsageName, .1f); + + cb(ErrorStatus::NONE, capabilities); + } + else + { + capabilities.float32Performance.execTime = 0; + capabilities.float32Performance.powerUsage = 0; + capabilities.quantized8Performance.execTime = 0; + capabilities.quantized8Performance.powerUsage = 0; + + cb(ErrorStatus::DEVICE_UNAVAILABLE, capabilities); + } + + return Void(); +} + +Return ArmnnDriverImpl::getSupportedOperations( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const neuralnetworks::V1_0::Model& model, + neuralnetworks::V1_0::IDevice::getSupportedOperations_cb cb) +{ + ALOGV("V1_0::ArmnnDriverImpl::getSupportedOperations()"); + + vector result; + + if (!runtime) + { + cb(ErrorStatus::DEVICE_UNAVAILABLE, result); + return Void(); + } + + // Run general model validation, if this doesn't pass we shouldn't analyse the model anyway + if (!android::nn::validateModel(model)) + { + cb(ErrorStatus::INVALID_ARGUMENT, result); + return Void(); + } + + // Attempt to convert the model to an ArmNN input network (INetwork). + ModelToINetworkConverter modelConverter(options.GetComputeDevice(), model, + options.GetForcedUnsupportedOperations()); + + if (modelConverter.GetConversionResult() != ConversionResult::Success + && modelConverter.GetConversionResult() != ConversionResult::UnsupportedFeature) + { + cb(ErrorStatus::GENERAL_FAILURE, result); + return Void(); + } + + // Check each operation if it was converted successfully and copy the flags + // into the result (vector) that we need to return to Android + result.reserve(model.operations.size()); + for (uint32_t operationIdx = 0; operationIdx < model.operations.size(); operationIdx++) + { + bool operationSupported = modelConverter.IsOperationSupported(operationIdx); + result.push_back(operationSupported); + } + + cb(ErrorStatus::NONE, result); + return Void(); +} + +Return ArmnnDriverImpl::prepareModel( + const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const neuralnetworks::V1_0::Model& model, + const sp& cb, + bool float32ToFloat16) +{ + ALOGV("V1_0::ArmnnDriverImpl::prepareModel()"); + + if (cb.get() == nullptr) + { + ALOGW("V1_0::ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel"); + return ErrorStatus::INVALID_ARGUMENT; + } + + if (!runtime) + { + return FailPrepareModel(ErrorStatus::DEVICE_UNAVAILABLE, + "V1_0::ArmnnDriverImpl::prepareModel: Device unavailable", cb); + } + + if (!android::nn::validateModel(model)) + { + return FailPrepareModel(ErrorStatus::INVALID_ARGUMENT, + "V1_0::ArmnnDriverImpl::prepareModel: Invalid model passed as input", cb); + } + + // Deliberately ignore any unsupported operations requested by the options - + // at this point we're being asked to prepare a model that we've already declared support for + // and the operation indices may be different to those in getSupportedOperations anyway. + set unsupportedOperations; + ModelToINetworkConverter modelConverter(options.GetComputeDevice(), model, + unsupportedOperations); + + if (modelConverter.GetConversionResult() != ConversionResult::Success) + { + FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb); + return ErrorStatus::NONE; + } + + // optimize the network + armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); + armnn::OptimizerOptions OptOptions; + OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16; + + try + { + optNet = armnn::Optimize(*modelConverter.GetINetwork(), + {options.GetComputeDevice()}, + runtime->GetDeviceSpec(), + OptOptions); + } + catch (armnn::Exception &e) + { + stringstream message; + message << "armnn::Exception (" << e.what() << ") caught from optimize."; + FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return ErrorStatus::NONE; + } + + // Check that the optimized network is valid. + if (!optNet) + { + FailPrepareModel(ErrorStatus::GENERAL_FAILURE, + "V1_0::ArmnnDriverImpl::prepareModel: Invalid optimized network", cb); + return ErrorStatus::NONE; + } + + // Export the optimized network graph to a dot file if an output dump directory + // has been specified in the drivers' arguments. + ExportNetworkGraphToDotFile(*optNet, + options.GetRequestInputsAndOutputsDumpDir(), + model); + + // load it into the runtime + armnn::NetworkId netId = 0; + try + { + if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success) + { + return FailPrepareModel(ErrorStatus::GENERAL_FAILURE, + "V1_0::ArmnnDriverImpl::prepareModel: Network could not be loaded", cb); + } + } + catch (armnn::Exception& e) + { + stringstream message; + message << "armnn::Exception (" << e.what()<< ") caught from LoadNetwork."; + FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb); + return ErrorStatus::NONE; + } + + unique_ptr preparedModel(new ArmnnPreparedModel( + netId, + runtime.get(), + model, + options.GetRequestInputsAndOutputsDumpDir(), + options.IsGpuProfilingEnabled() + )); + + // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if + // this is enabled) before the first 'real' inference which removes the overhead of the first inference. + preparedModel->ExecuteWithDummyInputs(); + + if (clTunedParameters && + options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters) + { + // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file. + try + { + clTunedParameters->Save(options.GetClTunedParametersFile().c_str()); + } + catch (const armnn::Exception& error) + { + ALOGE("V1_0::ArmnnDriverImpl: Failed to save CL tuned parameters file '%s': %s", + options.GetClTunedParametersFile().c_str(), error.what()); + } + } + + NotifyCallbackAndCheck(cb, ErrorStatus::NONE, preparedModel.release()); + + return ErrorStatus::NONE; +} + +Return ArmnnDriverImpl::getStatus() +{ + ALOGV("V1_0::ArmnnDriverImpl::getStatus()"); + + return DeviceStatus::AVAILABLE; +} + +} // armnn_driver::namespace V1_0 +} // namespace armnn_driver diff --git a/1.0/ArmnnDriverImpl.hpp b/1.0/ArmnnDriverImpl.hpp new file mode 100644 index 00000000..2628682d --- /dev/null +++ b/1.0/ArmnnDriverImpl.hpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include + +#include "DriverOptions.hpp" + +#include + +namespace armnn_driver +{ +namespace V1_0 +{ + +class ArmnnDriverImpl +{ +public: + static Return getCapabilities( + const armnn::IRuntimePtr& runtime, + ::android::hardware::neuralnetworks::V1_0::IDevice::getCapabilities_cb cb); + static Return getSupportedOperations( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const ::android::hardware::neuralnetworks::V1_0::Model& model, + ::android::hardware::neuralnetworks::V1_0::IDevice::getSupportedOperations_cb cb); + static Return prepareModel( + const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const ::android::hardware::neuralnetworks::V1_0::Model& model, + const android::sp& cb, + bool float32ToFloat16 = false); + static Return getStatus(); +}; + +} // namespace armnn_driver::V1_0 +} // namespace armnn_driver diff --git a/1.1/ArmnnDriver.hpp b/1.1/ArmnnDriver.hpp new file mode 100644 index 00000000..6bd8e03c --- /dev/null +++ b/1.1/ArmnnDriver.hpp @@ -0,0 +1,103 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include + +#include "ArmnnDevice.hpp" +#include "1.0/ArmnnDriverImpl.hpp" +#include "1.1/ArmnnDriverImpl.hpp" + +#include + +namespace armnn_driver { +namespace V1_1 { + +class ArmnnDriver : public ArmnnDevice, public ::android::hardware::neuralnetworks::V1_1::IDevice +{ +public: + ArmnnDriver(DriverOptions options) + : ArmnnDevice(std::move(options)) + { + ALOGV("V1_1::ArmnnDriver::ArmnnDriver()"); + } + ~ArmnnDriver() {} + +public: + Return getCapabilities( + ::android::hardware::neuralnetworks::V1_0::IDevice::getCapabilities_cb cb) + { + ALOGV("V1_1::ArmnnDriver::getCapabilities()"); + + return V1_0::ArmnnDriverImpl::getCapabilities(m_Runtime, cb); + } + + Return getSupportedOperations( + const ::android::hardware::neuralnetworks::V1_0::Model& model, + ::android::hardware::neuralnetworks::V1_0::IDevice::getSupportedOperations_cb cb) + { + ALOGV("V1_1::ArmnnDriver::getSupportedOperations()"); + + return V1_0::ArmnnDriverImpl::getSupportedOperations(m_Runtime, m_Options, model, cb); + } + + Return prepareModel( + const ::android::hardware::neuralnetworks::V1_0::Model& model, + const android::sp& cb) + { + ALOGV("V1_1::ArmnnDriver::prepareModel()"); + + return V1_0::ArmnnDriverImpl::prepareModel(m_Runtime, m_ClTunedParameters, m_Options, model, cb); + } + + Return getCapabilities_1_1( + ::android::hardware::neuralnetworks::V1_1::IDevice::getCapabilities_1_1_cb cb) + { + ALOGV("V1_1::ArmnnDriver::getCapabilities_1_1()"); + + return V1_1::ArmnnDriverImpl::getCapabilities_1_1(m_Runtime, cb); + } + + Return getSupportedOperations_1_1( + const ::android::hardware::neuralnetworks::V1_1::Model& model, + ::android::hardware::neuralnetworks::V1_1::IDevice::getSupportedOperations_1_1_cb cb) + { + ALOGV("V1_1::ArmnnDriver::getSupportedOperations_1_1()"); + + return V1_1::ArmnnDriverImpl::getSupportedOperations_1_1(m_Runtime, m_Options, model, cb); + } + + Return prepareModel_1_1( + const ::android::hardware::neuralnetworks::V1_1::Model& model, + ::android::hardware::neuralnetworks::V1_1::ExecutionPreference preference, + const android::sp& cb) + { + using namespace ::android::hardware::neuralnetworks::V1_0; + + ALOGV("V1_1::ArmnnDriver::prepareModel_1_1()"); + + if(!(preference == ExecutionPreference::LOW_POWER || + preference == ExecutionPreference::FAST_SINGLE_ANSWER || + preference == ExecutionPreference::SUSTAINED_SPEED)) + { + ALOGV("V1_1::ArmnnDriver::prepareModel_1_1(): Invalid execution preference"); + cb->notify(ErrorStatus::INVALID_ARGUMENT, nullptr); + return ErrorStatus::INVALID_ARGUMENT; + } + + return V1_1::ArmnnDriverImpl::prepareModel_1_1(m_Runtime, m_ClTunedParameters, m_Options, model, cb); + } + + Return getStatus() + { + ALOGV("V1_1::ArmnnDriver::getStatus()"); + + return V1_0::ArmnnDriverImpl::getStatus(); + } +}; + +} // armnn_driver::namespace V1_1 +} // namespace armnn_driver diff --git a/1.1/ArmnnDriverImpl.cpp b/1.1/ArmnnDriverImpl.cpp new file mode 100644 index 00000000..a5e32766 --- /dev/null +++ b/1.1/ArmnnDriverImpl.cpp @@ -0,0 +1,151 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ArmnnDriverImpl.hpp" +#include "../1.0/ArmnnDriverImpl.hpp" + +#include + +#include +#include + +#include + +using namespace std; +using namespace android; +using namespace android::nn; +using namespace android::hardware; + +namespace +{ + +void NotifyCallbackAndCheck(const sp& callback, + ErrorStatus errorStatus, + const sp& preparedModelPtr) +{ + Return returned = callback->notify(errorStatus, preparedModelPtr); + // This check is required, if the callback fails and it isn't checked it will bring down the service + if (!returned.isOk()) + { + ALOGE("V1_1::ArmnnDriverImpl::prepareModel_1_1: hidl callback failed to return properly: %s ", + returned.description().c_str()); + } +} + +Return FailPrepareModel(ErrorStatus error, + const string& message, + const sp& callback) +{ + ALOGW("V1_1::ArmnnDriverImpl::prepareModel_1_1: %s", message.c_str()); + NotifyCallbackAndCheck(callback, error, nullptr); + return error; +} + +} // namespace + +namespace armnn_driver +{ +namespace V1_1 +{ + +Return ArmnnDriverImpl::getCapabilities_1_1( + const armnn::IRuntimePtr& runtime, + neuralnetworks::V1_1::IDevice::getCapabilities_1_1_cb cb) +{ + ALOGV("V1_1::ArmnnDriverImpl::getCapabilities_1_1()"); + + neuralnetworks::V1_0::IDevice::getCapabilities_cb cb_1_0 = + [&](ErrorStatus status, const neuralnetworks::V1_0::Capabilities& capabilities) + { + BOOST_ASSERT_MSG(compliantWithV1_1(capabilities), + "V1_1::ArmnnDriverImpl: V1_0::Capabilities not compliant with V1_1::Capabilities"); + + cb(status, convertToV1_1(capabilities)); + }; + + V1_0::ArmnnDriverImpl::getCapabilities(runtime, cb_1_0); + + return Void(); +} + +Return ArmnnDriverImpl::getSupportedOperations_1_1( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const neuralnetworks::V1_1::Model& model, + neuralnetworks::V1_1::IDevice::getSupportedOperations_1_1_cb cb) +{ + ALOGV("V1_1::ArmnnDriverImpl::getSupportedOperations_1_1()"); + + if(compliantWithV1_0(model)) + { + V1_0::ArmnnDriverImpl::getSupportedOperations(runtime, options, convertToV1_0(model), cb); + } + else + { + std::vector result; + + if (!runtime) + { + ALOGW("V1_1::ArmnnDriverImpl::getSupportedOperations_1_1: Device unavailable"); + cb(ErrorStatus::DEVICE_UNAVAILABLE, result); + return Void(); + } + + if (!android::nn::validateModel(model)) + { + ALOGW("V1_1::ArmnnDriverImpl::getSupportedOperations_1_1: Invalid model passed as input"); + cb(ErrorStatus::INVALID_ARGUMENT, result); + return Void(); + } + + result.assign(model.operations.size(), false); + cb(ErrorStatus::NONE, result); + } + + return Void(); +} + +Return ArmnnDriverImpl::prepareModel_1_1( + const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const neuralnetworks::V1_1::Model& model, + const sp& cb) +{ + ALOGV("V1_1::ArmnnDriverImpl::prepareModel_1_1()"); + + if(compliantWithV1_0(model)) + { + return V1_0::ArmnnDriverImpl::prepareModel(runtime, clTunedParameters, options, convertToV1_0(model), cb, + model.relaxComputationFloat32toFloat16 && options.GetFp16Enabled()); + } + else + { + if (cb.get() == nullptr) + { + ALOGW("V1_1::ArmnnDriverImpl::prepareModel_1_1: Invalid callback passed to prepareModel"); + return ErrorStatus::INVALID_ARGUMENT; + } + + if (!runtime) + { + return FailPrepareModel(ErrorStatus::DEVICE_UNAVAILABLE, + "V1_1::ArmnnDriverImpl::prepareModel_1_1: Device unavailable", cb); + } + + if (!android::nn::validateModel(model)) + { + return FailPrepareModel(ErrorStatus::INVALID_ARGUMENT, + "V1_1::ArmnnDriverImpl::prepareModel_1_1: Invalid model passed as input", cb); + } + + FailPrepareModel(ErrorStatus::GENERAL_FAILURE, + "V1_1::ArmnnDriverImpl::prepareModel_1_1: Unsupported model", cb); + return ErrorStatus::NONE; + } +} + +} // armnn_driver::namespace V1_1 +} // namespace armnn_driver diff --git a/1.1/ArmnnDriverImpl.hpp b/1.1/ArmnnDriverImpl.hpp new file mode 100644 index 00000000..307d96bf --- /dev/null +++ b/1.1/ArmnnDriverImpl.hpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include + +#include "DriverOptions.hpp" + +#include + +namespace armnn_driver +{ +namespace V1_1 +{ + +class ArmnnDriverImpl +{ +public: + static Return getCapabilities_1_1( + const armnn::IRuntimePtr& runtime, + ::android::hardware::neuralnetworks::V1_1::IDevice::getCapabilities_1_1_cb cb); + static Return getSupportedOperations_1_1( + const armnn::IRuntimePtr& runtime, + const DriverOptions& options, + const ::android::hardware::neuralnetworks::V1_1::Model& model, + ::android::hardware::neuralnetworks::V1_1::IDevice::getSupportedOperations_1_1_cb cb); + static Return prepareModel_1_1( + const armnn::IRuntimePtr& runtime, + const armnn::IGpuAccTunedParametersPtr& clTunedParameters, + const DriverOptions& options, + const ::android::hardware::neuralnetworks::V1_1::Model& model, + const android::sp& cb); +}; + +} // namespace armnn_driver::V1_1 +} // namespace armnn_driver diff --git a/Android.bp b/Android.bp index 03b2ded4..e44275fa 100644 --- a/Android.bp +++ b/Android.bp @@ -37,6 +37,7 @@ cc_library_static { "clframework/src/core/CL/kernels/CLActivationLayerKernel.cpp", "clframework/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp", "clframework/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp", + "clframework/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp", "clframework/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp", "clframework/src/core/CL/kernels/CLBitwiseAndKernel.cpp", "clframework/src/core/CL/kernels/CLBitwiseNotKernel.cpp", @@ -46,8 +47,10 @@ cc_library_static { "clframework/src/core/CL/kernels/CLCannyEdgeKernel.cpp", "clframework/src/core/CL/kernels/CLChannelCombineKernel.cpp", "clframework/src/core/CL/kernels/CLChannelExtractKernel.cpp", + "clframework/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp", "clframework/src/core/CL/kernels/CLCol2ImKernel.cpp", "clframework/src/core/CL/kernels/CLColorConvertKernel.cpp", + "clframework/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp", "clframework/src/core/CL/kernels/CLConvolutionKernel.cpp", "clframework/src/core/CL/kernels/CLCopyKernel.cpp", "clframework/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp", @@ -66,6 +69,7 @@ cc_library_static { "clframework/src/core/CL/kernels/CLErodeKernel.cpp", "clframework/src/core/CL/kernels/CLFastCornersKernel.cpp", "clframework/src/core/CL/kernels/CLFillBorderKernel.cpp", + "clframework/src/core/CL/kernels/CLFlattenLayerKernel.cpp", "clframework/src/core/CL/kernels/CLFloorKernel.cpp", "clframework/src/core/CL/kernels/CLGaussian3x3Kernel.cpp", "clframework/src/core/CL/kernels/CLGaussian5x5Kernel.cpp", @@ -118,6 +122,7 @@ cc_library_static { "clframework/src/core/CL/kernels/CLWarpAffineKernel.cpp", "clframework/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp", "clframework/src/core/CL/kernels/CLWeightsReshapeKernel.cpp", + "clframework/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp", "clframework/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp", "clframework/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp", "clframework/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp", @@ -144,6 +149,11 @@ cc_library_static { "clframework/src/core/NEON/kernels/arm_gemm/gemm_int8.cpp", "clframework/src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp", "clframework/src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp", + "clframework/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp", + "clframework/src/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.cpp", + "clframework/src/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.cpp", + "clframework/src/core/NEON/kernels/assembly/NEGEMMInterleavedTransformAWrapper.cpp", + "clframework/src/core/NEON/kernels/assembly/NEGEMMNativeWrapperKernel.cpp", "clframework/src/core/NEON/kernels/convolution/common/utils.cpp", "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_1x1_fp32_fp32.cpp", "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_2x2_fp32_fp32.cpp", @@ -178,6 +188,7 @@ cc_library_static { "clframework/src/core/NEON/kernels/NEChannelExtractKernel.cpp", "clframework/src/core/NEON/kernels/NECol2ImKernel.cpp", "clframework/src/core/NEON/kernels/NEColorConvertKernel.cpp", + "clframework/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp", "clframework/src/core/NEON/kernels/NEConvolutionKernel.cpp", "clframework/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp", "clframework/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp", @@ -247,6 +258,7 @@ cc_library_static { "clframework/src/core/NEON/kernels/NETransposeKernel.cpp", "clframework/src/core/NEON/kernels/NEWarpKernel.cpp", "clframework/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp", + "clframework/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp", "clframework/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp", "clframework/src/core/PyramidInfo.cpp", "clframework/src/core/Rounding.cpp", @@ -282,6 +294,7 @@ cc_library_static { "clframework/src/runtime/CL/functions/CLAccumulate.cpp", "clframework/src/runtime/CL/functions/CLActivationLayer.cpp", "clframework/src/runtime/CL/functions/CLArithmeticAddition.cpp", + "clframework/src/runtime/CL/functions/CLArithmeticDivision.cpp", "clframework/src/runtime/CL/functions/CLArithmeticSubtraction.cpp", "clframework/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp", "clframework/src/runtime/CL/functions/CLBitwiseAnd.cpp", @@ -292,7 +305,10 @@ cc_library_static { "clframework/src/runtime/CL/functions/CLCannyEdge.cpp", "clframework/src/runtime/CL/functions/CLChannelCombine.cpp", "clframework/src/runtime/CL/functions/CLChannelExtract.cpp", + "clframework/src/runtime/CL/functions/CLChannelShuffleLayer.cpp", "clframework/src/runtime/CL/functions/CLColorConvert.cpp", + "clframework/src/runtime/CL/functions/CLConcatenateLayer.cpp", + "clframework/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp", "clframework/src/runtime/CL/functions/CLConvolution.cpp", "clframework/src/runtime/CL/functions/CLConvolutionLayer.cpp", "clframework/src/runtime/CL/functions/CLCopy.cpp", @@ -333,6 +349,7 @@ cc_library_static { "clframework/src/runtime/CL/functions/CLLaplacianPyramid.cpp", "clframework/src/runtime/CL/functions/CLLaplacianReconstruct.cpp", "clframework/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp", + "clframework/src/runtime/CL/functions/CLLSTMLayer.cpp", "clframework/src/runtime/CL/functions/CLMagnitude.cpp", "clframework/src/runtime/CL/functions/CLMeanStdDev.cpp", "clframework/src/runtime/CL/functions/CLMedian3x3.cpp", @@ -362,10 +379,12 @@ cc_library_static { "clframework/src/runtime/CL/functions/CLTranspose.cpp", "clframework/src/runtime/CL/functions/CLWarpAffine.cpp", "clframework/src/runtime/CL/functions/CLWarpPerspective.cpp", + "clframework/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp", "clframework/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp", "clframework/src/runtime/CL/functions/CLWinogradInputTransform.cpp", "clframework/src/runtime/CL/ICLSimpleFunction.cpp", "clframework/src/runtime/CL/tuners/BifrostTuner.cpp", + "clframework/src/runtime/CL/tuners/MidgardTuner.cpp", "clframework/src/runtime/CPP/CPPScheduler.cpp", "clframework/src/runtime/CPP/functions/CPPPermute.cpp", "clframework/src/runtime/CPP/functions/CPPUpsample.cpp", @@ -384,6 +403,7 @@ cc_library_static { "clframework/src/runtime/MemoryManagerOnDemand.cpp", "clframework/src/runtime/MultiHOG.cpp", "clframework/src/runtime/MultiImage.cpp", + "clframework/src/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.cpp", "clframework/src/runtime/NEON/functions/NEAbsoluteDifference.cpp", "clframework/src/runtime/NEON/functions/NEAccumulate.cpp", "clframework/src/runtime/NEON/functions/NEActivationLayer.cpp", @@ -400,6 +420,8 @@ cc_library_static { "clframework/src/runtime/NEON/functions/NEChannelExtract.cpp", "clframework/src/runtime/NEON/functions/NECol2Im.cpp", "clframework/src/runtime/NEON/functions/NEColorConvert.cpp", + "clframework/src/runtime/NEON/functions/NEConcatenateLayer.cpp", + "clframework/src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp", "clframework/src/runtime/NEON/functions/NEConvolution.cpp", "clframework/src/runtime/NEON/functions/NEConvolutionLayer.cpp", "clframework/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp", @@ -421,6 +443,7 @@ cc_library_static { "clframework/src/runtime/NEON/functions/NEGaussian3x3.cpp", "clframework/src/runtime/NEON/functions/NEGaussian5x5.cpp", "clframework/src/runtime/NEON/functions/NEGaussianPyramid.cpp", + "clframework/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp", "clframework/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp", "clframework/src/runtime/NEON/functions/NEGEMM.cpp", "clframework/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp", @@ -457,8 +480,10 @@ cc_library_static { "clframework/src/runtime/NEON/functions/NERemap.cpp", "clframework/src/runtime/NEON/functions/NEReshapeLayer.cpp", "clframework/src/runtime/NEON/functions/NEROIPoolingLayer.cpp", + "clframework/src/runtime/NEON/functions/NERNNLayer.cpp", "clframework/src/runtime/NEON/functions/NEScale.cpp", "clframework/src/runtime/NEON/functions/NEScharr3x3.cpp", + "clframework/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp", "clframework/src/runtime/NEON/functions/NESobel3x3.cpp", "clframework/src/runtime/NEON/functions/NESobel5x5.cpp", "clframework/src/runtime/NEON/functions/NESobel7x7.cpp", @@ -468,11 +493,11 @@ cc_library_static { "clframework/src/runtime/NEON/functions/NETranspose.cpp", "clframework/src/runtime/NEON/functions/NEWarpAffine.cpp", "clframework/src/runtime/NEON/functions/NEWarpPerspective.cpp", + "clframework/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp", "clframework/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp", "clframework/src/runtime/NEON/INESimpleFunction.cpp", "clframework/src/runtime/OffsetLifetimeManager.cpp", "clframework/src/runtime/OffsetMemoryPool.cpp", - "clframework/src/runtime/OMP/OMPScheduler.cpp", "clframework/src/runtime/PoolManager.cpp", "clframework/src/runtime/Pyramid.cpp", "clframework/src/runtime/Scheduler.cpp", @@ -480,6 +505,7 @@ cc_library_static { "clframework/src/runtime/TensorAllocator.cpp", "clframework/src/runtime/Tensor.cpp", "clframework/src/runtime/Utils.cpp", + "clframework/utils/CommonGraphOptions.cpp", "clframework/utils/GraphUtils.cpp", "clframework/utils/Utils.cpp", ], @@ -518,6 +544,7 @@ cc_library_static { "-fexceptions", "-DEMBEDDED_KERNELS", "-DARM_COMPUTE_ASSERTS_ENABLED", + "-DARM_COMPUTE_CPP_SCHEDULER", "-Wno-unused-parameter", "-DNO_DOT_IN_TOOLCHAIN", "-no-integrated-as" diff --git a/Android.mk b/Android.mk index e69514c2..c0dd1e6d 100644 --- a/Android.mk +++ b/Android.mk @@ -12,12 +12,12 @@ ARMNN_UTILS_HEADER_PATH := $(LOCAL_PATH)/armnn/src/armnnUtils OPENCL_HEADER_PATH := $(LOCAL_PATH)/clframework/include NN_HEADER_PATH := $(LOCAL_PATH)/../../../frameworks/ml/nn/runtime/include -################### -# libarmnn-driver # -################### +####################### +# libarmnn-driver@1.0 # +####################### include $(CLEAR_VARS) -LOCAL_MODULE := libarmnn-driver +LOCAL_MODULE := libarmnn-driver@1.0 LOCAL_MODULE_TAGS := eng optional LOCAL_ARM_MODE := arm LOCAL_PROPRIETARY_MODULE := true @@ -25,16 +25,16 @@ LOCAL_PROPRIETARY_MODULE := true LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk LOCAL_C_INCLUDES := \ - $(ARMNN_HEADER_PATH) \ - $(ARMNN_UTILS_HEADER_PATH) \ - $(OPENCL_HEADER_PATH) \ - $(NN_HEADER_PATH) + $(ARMNN_HEADER_PATH) \ + $(ARMNN_UTILS_HEADER_PATH) \ + $(OPENCL_HEADER_PATH) \ + $(NN_HEADER_PATH) LOCAL_CFLAGS := \ - -std=c++14 \ - -fexceptions \ - -Werror \ - -Wno-format-security + -std=c++14 \ + -fexceptions \ + -Werror \ + -Wno-format-security ifeq ($(PLATFORM_VERSION),9) # Required to build with the changes made to the Android ML framework starting from Android P, # regardless of the HAL version used for the build. @@ -42,43 +42,110 @@ LOCAL_CFLAGS+= \ -DARMNN_ANDROID_P endif ifeq ($(ARMNN_DRIVER_DEBUG),1) - LOCAL_CFLAGS+= -UNDEBUG +LOCAL_CFLAGS+= \ + -UNDEBUG endif LOCAL_SRC_FILES := \ - ArmnnDriver.cpp \ - ArmnnPreparedModel.cpp \ - ModelToINetworkConverter.cpp \ - RequestThread.cpp \ - Utils.cpp + 1.0/ArmnnDriverImpl.cpp \ + DriverOptions.cpp \ + ArmnnDevice.cpp \ + ArmnnPreparedModel.cpp \ + ModelToINetworkConverter.cpp \ + RequestThread.cpp \ + Utils.cpp LOCAL_STATIC_LIBRARIES := \ - libneuralnetworks_common \ - libarmnn \ - libboost_log \ - libboost_program_options \ - libboost_system \ - libboost_thread \ - armnn-arm_compute + libneuralnetworks_common \ + libarmnn \ + libboost_log \ + libboost_program_options \ + libboost_system \ + libboost_thread \ + armnn-arm_compute LOCAL_SHARED_LIBRARIES := \ - libbase \ - libhidlbase \ - libhidltransport \ - libhidlmemory \ - liblog \ - libutils \ - android.hardware.neuralnetworks@1.0 \ - android.hidl.allocator@1.0 \ - android.hidl.memory@1.0 \ - libOpenCL + libbase \ + libhidlbase \ + libhidltransport \ + libhidlmemory \ + liblog \ + libutils \ + android.hardware.neuralnetworks@1.0 \ + android.hidl.allocator@1.0 \ + android.hidl.memory@1.0 \ + libOpenCL ifeq ($(PLATFORM_VERSION),9) # Required to build the 1.0 version of the NN Driver on Android P and later versions, # as the 1.0 version of the NN API needs the 1.1 HAL headers to be included regardless. LOCAL_SHARED_LIBRARIES+= \ - android.hardware.neuralnetworks@1.1 + android.hardware.neuralnetworks@1.1 +endif + +include $(BUILD_STATIC_LIBRARY) + +####################### +# libarmnn-driver@1.1 # +####################### +include $(CLEAR_VARS) + +LOCAL_MODULE := libarmnn-driver@1.1 +LOCAL_MODULE_TAGS := eng optional +LOCAL_ARM_MODE := arm +LOCAL_PROPRIETARY_MODULE := true +# Mark source files as dependent on Android.mk +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk + +LOCAL_C_INCLUDES := \ + $(ARMNN_HEADER_PATH) \ + $(ARMNN_UTILS_HEADER_PATH) \ + $(OPENCL_HEADER_PATH) \ + $(NN_HEADER_PATH) + +LOCAL_CFLAGS := \ + -std=c++14 \ + -fexceptions \ + -Werror \ + -Wno-format-security \ + -DARMNN_ANDROID_P \ + -DARMNN_ANDROID_NN_V1_1 +ifeq ($(ARMNN_DRIVER_DEBUG),1) +LOCAL_CFLAGS+= \ + -UNDEBUG endif +LOCAL_SRC_FILES := \ + 1.0/ArmnnDriverImpl.cpp \ + 1.1/ArmnnDriverImpl.cpp \ + DriverOptions.cpp \ + ArmnnDevice.cpp \ + ArmnnPreparedModel.cpp \ + ModelToINetworkConverter.cpp \ + RequestThread.cpp \ + Utils.cpp + +LOCAL_STATIC_LIBRARIES := \ + libneuralnetworks_common \ + libarmnn \ + libboost_log \ + libboost_program_options \ + libboost_system \ + libboost_thread \ + armnn-arm_compute + +LOCAL_SHARED_LIBRARIES := \ + libbase \ + libhidlbase \ + libhidltransport \ + libhidlmemory \ + liblog \ + libutils \ + android.hardware.neuralnetworks@1.0 \ + android.hardware.neuralnetworks@1.1 \ + android.hidl.allocator@1.0 \ + android.hidl.memory@1.0 \ + libOpenCL + include $(BUILD_STATIC_LIBRARY) ##################################################### @@ -95,56 +162,110 @@ LOCAL_PROPRIETARY_MODULE := true # Mark source files as dependent on Android.mk LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk -LOCAL_C_INCLUDES := \ - $(ARMNN_HEADER_PATH) \ - $(NN_HEADER_PATH) +LOCAL_C_INCLUDES := \ + $(ARMNN_HEADER_PATH) \ + $(NN_HEADER_PATH) LOCAL_CFLAGS := \ - -std=c++14 \ - -fexceptions + -std=c++14 \ + -fexceptions ifeq ($(ARMNN_DRIVER_DEBUG),1) - LOCAL_CFLAGS+= -UNDEBUG +LOCAL_CFLAGS += \ + -UNDEBUG endif LOCAL_SRC_FILES := \ - service.cpp + service.cpp LOCAL_STATIC_LIBRARIES := \ - libarmnn-driver \ - libneuralnetworks_common \ - libarmnn \ - libboost_log \ - libboost_program_options \ - libboost_system \ - libboost_thread \ - armnn-arm_compute -ifeq ($(PLATFORM_VERSION),9) -# Required to build the 1.0 version of the NN Driver on Android P and later versions. -LOCAL_STATIC_LIBRARIES+= \ - libomp -endif + libarmnn-driver@1.0 \ + libneuralnetworks_common \ + libarmnn \ + libboost_log \ + libboost_program_options \ + libboost_system \ + libboost_thread \ + armnn-arm_compute LOCAL_SHARED_LIBRARIES := \ - libbase \ - libhidlbase \ - libhidltransport \ - libhidlmemory \ - libdl \ - libhardware \ - liblog \ - libtextclassifier_hash \ - libutils \ - android.hardware.neuralnetworks@1.0 \ - android.hidl.allocator@1.0 \ - android.hidl.memory@1.0 \ - libOpenCL + libbase \ + libhidlbase \ + libhidltransport \ + libhidlmemory \ + libdl \ + libhardware \ + liblog \ + libtextclassifier_hash \ + libutils \ + android.hardware.neuralnetworks@1.0 \ + android.hidl.allocator@1.0 \ + android.hidl.memory@1.0 \ + libOpenCL ifeq ($(PLATFORM_VERSION),9) # Required to build the 1.0 version of the NN Driver on Android P and later versions, # as the 1.0 version of the NN API needs the 1.1 HAL headers to be included regardless. LOCAL_SHARED_LIBRARIES+= \ - android.hardware.neuralnetworks@1.1 + android.hardware.neuralnetworks@1.1 +endif + +include $(BUILD_EXECUTABLE) + +##################################################### +# android.hardware.neuralnetworks@1.1-service-armnn # +##################################################### +include $(CLEAR_VARS) + +LOCAL_MODULE := android.hardware.neuralnetworks@1.1-service-armnn +LOCAL_INIT_RC := android.hardware.neuralnetworks@1.1-service-armnn.rc +LOCAL_MODULE_TAGS := eng optional +LOCAL_ARM_MODE := arm +LOCAL_MODULE_RELATIVE_PATH := hw +LOCAL_PROPRIETARY_MODULE := true +# Mark source files as dependent on Android.mk +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk + +LOCAL_C_INCLUDES := \ + $(ARMNN_HEADER_PATH) \ + $(NN_HEADER_PATH) + +LOCAL_CFLAGS := \ + -std=c++14 \ + -fexceptions \ + -DARMNN_ANDROID_NN_V1_1 +ifeq ($(ARMNN_DRIVER_DEBUG),1) +LOCAL_CFLAGS += \ + -UNDEBUG endif +LOCAL_SRC_FILES := \ + service.cpp + +LOCAL_STATIC_LIBRARIES := \ + libarmnn-driver@1.1 \ + libneuralnetworks_common \ + libarmnn \ + libboost_log \ + libboost_program_options \ + libboost_system \ + libboost_thread \ + armnn-arm_compute + +LOCAL_SHARED_LIBRARIES := \ + libbase \ + libhidlbase \ + libhidltransport \ + libhidlmemory \ + libdl \ + libhardware \ + liblog \ + libtextclassifier_hash \ + libutils \ + android.hardware.neuralnetworks@1.0 \ + android.hardware.neuralnetworks@1.1 \ + android.hidl.allocator@1.0 \ + android.hidl.memory@1.0 \ + libOpenCL + include $(BUILD_EXECUTABLE) ########################## diff --git a/ArmnnDevice.cpp b/ArmnnDevice.cpp new file mode 100644 index 00000000..3e0b0da2 --- /dev/null +++ b/ArmnnDevice.cpp @@ -0,0 +1,65 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ArmnnDevice.hpp" + +#include + +#include + +#include + +using namespace android; + +namespace armnn_driver +{ + +ArmnnDevice::ArmnnDevice(DriverOptions options) + : m_Runtime(nullptr, nullptr) + , m_ClTunedParameters(nullptr) + , m_Options(std::move(options)) +{ + ALOGV("ArmnnDevice::ArmnnDevice()"); + + armnn::ConfigureLogging(false, m_Options.IsVerboseLoggingEnabled(), armnn::LogSeverity::Trace); + if (m_Options.IsVerboseLoggingEnabled()) + { + SetMinimumLogSeverity(base::VERBOSE); + } + else + { + SetMinimumLogSeverity(base::INFO); + } + + try + { + armnn::IRuntime::CreationOptions options; + if (!m_Options.GetClTunedParametersFile().empty()) + { + m_ClTunedParameters = armnn::IGpuAccTunedParameters::Create(m_Options.GetClTunedParametersMode()); + try + { + m_ClTunedParameters->Load(m_Options.GetClTunedParametersFile().c_str()); + } + catch (const armnn::Exception& error) + { + // This is only a warning because the file won't exist the first time you are generating it. + ALOGW("ArmnnDevice: Failed to load CL tuned parameters file '%s': %s", + m_Options.GetClTunedParametersFile().c_str(), error.what()); + } + options.m_GpuAccTunedParameters = m_ClTunedParameters; + } + + options.m_EnableGpuProfiling = m_Options.IsGpuProfilingEnabled(); + + m_Runtime = armnn::IRuntime::Create(options); + } + catch (const armnn::ClRuntimeUnavailableException& error) + { + ALOGE("ArmnnDevice: Failed to setup CL runtime: %s. Device will be unavailable.", error.what()); + } +} + +} // namespace armnn_driver diff --git a/ArmnnDevice.hpp b/ArmnnDevice.hpp new file mode 100644 index 00000000..83414d54 --- /dev/null +++ b/ArmnnDevice.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "DriverOptions.hpp" + +#include + +namespace armnn_driver +{ + +class ArmnnDevice +{ +protected: + ArmnnDevice(DriverOptions options); + virtual ~ArmnnDevice() {} + +protected: + armnn::IRuntimePtr m_Runtime; + armnn::IGpuAccTunedParametersPtr m_ClTunedParameters; + DriverOptions m_Options; +}; + +} // namespace armnn_driver diff --git a/ArmnnDriver.cpp b/ArmnnDriver.cpp deleted file mode 100644 index 4d58249e..00000000 --- a/ArmnnDriver.cpp +++ /dev/null @@ -1,453 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// See LICENSE file in the project root for full license information. -// - -#define LOG_TAG "ArmnnDriver" - -#include "ArmnnDriver.hpp" -#include "ArmnnPreparedModel.hpp" -#include "ModelToINetworkConverter.hpp" -#include "Utils.hpp" - -#include -#include "SystemPropertiesUtils.hpp" - -#include "OperationsUtils.h" - -#if defined(ARMNN_ANDROID_P) -// The headers of the ML framework have changed between Android O and Android P. -// The validation functions have been moved into their own header, ValidateHal.h. -#include -#endif - -#include -#include - -#include -#include -#include -#include - -using namespace android; -using namespace std; - -namespace -{ - -const char *g_Float32PerformanceExecTimeName = "ArmNN.float32Performance.execTime"; -const char *g_Float32PerformancePowerUsageName = "ArmNN.float32Performance.powerUsage"; -const char *g_Quantized8PerformanceExecTimeName = "ArmNN.quantized8Performance.execTime"; -const char *g_Quantized8PerformancePowerUsageName = "ArmNN.quantized8Performance.powerUsage"; - -}; //namespace - -namespace armnn_driver -{ - -DriverOptions::DriverOptions(armnn::Compute computeDevice) -: m_ComputeDevice(computeDevice) -, m_VerboseLogging(false) -, m_UseAndroidNnCpuExecutor(false) -, m_ClTunedParametersMode(armnn::IClTunedParameters::Mode::UseTunedParameters) -{ -} - -DriverOptions::DriverOptions(int argc, char** argv) -: m_ComputeDevice(armnn::Compute::GpuAcc) -, m_VerboseLogging(false) -, m_UseAndroidNnCpuExecutor(false) -, m_ClTunedParametersMode(armnn::IClTunedParameters::Mode::UseTunedParameters) -{ - namespace po = boost::program_options; - - std::string computeDeviceAsString; - std::string unsupportedOperationsAsString; - std::string clTunedParametersModeAsString; - - po::options_description optionsDesc("Options"); - optionsDesc.add_options() - ("compute,c", - po::value(&computeDeviceAsString)->default_value("GpuAcc"), - "Which device to run layers on by default. Possible values are: CpuRef, CpuAcc, GpuAcc") - - ("verbose-logging,v", - po::bool_switch(&m_VerboseLogging), - "Turns verbose logging on") - - ("use-androidnn-cpu-executor,e", - po::bool_switch(&m_UseAndroidNnCpuExecutor), - "Forces the driver to satisfy requests via the Android-provided CpuExecutor") - - ("request-inputs-and-outputs-dump-dir,d", - po::value(&m_RequestInputsAndOutputsDumpDir)->default_value(""), - "If non-empty, the directory where request inputs and outputs should be dumped") - - ("unsupported-operations,u", - po::value(&unsupportedOperationsAsString)->default_value(""), - "If non-empty, a comma-separated list of operation indices which the driver will forcibly " - "consider unsupported") - - ("cl-tuned-parameters-file,t", - po::value(&m_ClTunedParametersFile)->default_value(""), - "If non-empty, the given file will be used to load/save CL tuned parameters. " - "See also --cl-tuned-parameters-mode") - - ("cl-tuned-parameters-mode,m", - po::value(&clTunedParametersModeAsString)->default_value("UseTunedParameters"), - "If 'UseTunedParameters' (the default), will read CL tuned parameters from the file specified by " - "--cl-tuned-parameters-file. " - "If 'UpdateTunedParameters', will also find the optimum parameters when preparing new networks and update " - "the file accordingly."); - - - po::variables_map variablesMap; - try - { - po::store(po::parse_command_line(argc, argv, optionsDesc), variablesMap); - po::notify(variablesMap); - } - catch (const po::error& e) - { - ALOGW("An error occurred attempting to parse program options: %s", e.what()); - } - - if (computeDeviceAsString == "CpuRef") - { - m_ComputeDevice = armnn::Compute::CpuRef; - } - else if (computeDeviceAsString == "GpuAcc") - { - m_ComputeDevice = armnn::Compute::GpuAcc; - } - else if (computeDeviceAsString == "CpuAcc") - { - m_ComputeDevice = armnn::Compute::CpuAcc; - } - else - { - ALOGW("Requested unknown compute device %s. Defaulting to compute id %s", - computeDeviceAsString.c_str(), GetComputeDeviceAsCString(m_ComputeDevice)); - } - - if (!unsupportedOperationsAsString.empty()) - { - std::istringstream argStream(unsupportedOperationsAsString); - - std::string s; - while (!argStream.eof()) - { - std::getline(argStream, s, ','); - try - { - unsigned int operationIdx = std::stoi(s); - m_ForcedUnsupportedOperations.insert(operationIdx); - } - catch (const std::invalid_argument&) - { - ALOGW("Ignoring invalid integer argument in -u/--unsupported-operations value: %s", s.c_str()); - } - } - } - - if (!m_ClTunedParametersFile.empty()) - { - // The mode is only relevant if the file path has been provided - if (clTunedParametersModeAsString == "UseTunedParameters") - { - m_ClTunedParametersMode = armnn::IClTunedParameters::Mode::UseTunedParameters; - } - else if (clTunedParametersModeAsString == "UpdateTunedParameters") - { - m_ClTunedParametersMode = armnn::IClTunedParameters::Mode::UpdateTunedParameters; - } - else - { - ALOGW("Requested unknown cl-tuned-parameters-mode '%s'. Defaulting to UseTunedParameters", - clTunedParametersModeAsString.c_str()); - } - } -} - -ArmnnDriver::ArmnnDriver(DriverOptions options) - : m_Runtime(nullptr, nullptr) - , m_ClTunedParameters(nullptr, nullptr) - , m_Options(std::move(options)) -{ - ALOGV("ArmnnDriver::ArmnnDriver()"); - - armnn::ConfigureLogging(false, m_Options.IsVerboseLoggingEnabled(), armnn::LogSeverity::Trace); - if (m_Options.IsVerboseLoggingEnabled()) - { - SetMinimumLogSeverity(base::VERBOSE); - } - else - { - SetMinimumLogSeverity(base::INFO); - } - - try - { - armnn::IRuntime::CreationOptions options(m_Options.GetComputeDevice()); - options.m_UseCpuRefAsFallback = false; - if (!m_Options.GetClTunedParametersFile().empty()) - { - m_ClTunedParameters = armnn::IClTunedParameters::Create(m_Options.GetClTunedParametersMode()); - try - { - m_ClTunedParameters->Load(m_Options.GetClTunedParametersFile().c_str()); - } - catch (const armnn::Exception& error) - { - // This is only a warning because the file won't exist the first time you are generating it. - ALOGW("ArmnnDriver: Failed to load CL tuned parameters file '%s': %s", - m_Options.GetClTunedParametersFile().c_str(), error.what()); - } - options.m_ClTunedParameters = m_ClTunedParameters.get(); - } - m_Runtime = armnn::IRuntime::Create(options); - } - catch (const armnn::ClRuntimeUnavailableException& error) - { - ALOGE("ArmnnDriver: Failed to setup CL runtime: %s. Device will be unavailable.", error.what()); - } -} - -Return ArmnnDriver::getCapabilities(V1_0::IDevice::getCapabilities_cb cb) -{ - ALOGV("ArmnnDriver::getCapabilities()"); - - V1_0::Capabilities capabilities; - if (m_Runtime) - { - capabilities.float32Performance.execTime = - ParseSystemProperty(g_Float32PerformanceExecTimeName, 1.0f); - - capabilities.float32Performance.powerUsage = - ParseSystemProperty(g_Float32PerformancePowerUsageName, 1.0f); - - capabilities.quantized8Performance.execTime = - ParseSystemProperty(g_Quantized8PerformanceExecTimeName, 1.0f); - - capabilities.quantized8Performance.powerUsage = - ParseSystemProperty(g_Quantized8PerformancePowerUsageName, 1.0f); - - cb(ErrorStatus::NONE, capabilities); - } - else - { - capabilities.float32Performance.execTime = 0; - capabilities.float32Performance.powerUsage = 0; - capabilities.quantized8Performance.execTime = 0; - capabilities.quantized8Performance.powerUsage = 0; - - cb(ErrorStatus::DEVICE_UNAVAILABLE, capabilities); - } - - return Void(); -} - -Return ArmnnDriver::getSupportedOperations(const V1_0::Model& model, V1_0::IDevice::getSupportedOperations_cb cb) -{ - ALOGV("ArmnnDriver::getSupportedOperations()"); - - std::vector result; - - if (!m_Runtime) - { - cb(ErrorStatus::DEVICE_UNAVAILABLE, result); - return Void(); - } - - // Run general model validation, if this doesn't pass we shouldn't analyse the model anyway - if (!android::nn::validateModel(model)) - { - cb(ErrorStatus::INVALID_ARGUMENT, result); - return Void(); - } - - // Attempt to convert the model to an ArmNN input network (INetwork). - ModelToINetworkConverter modelConverter(m_Runtime->GetDeviceSpec().DefaultComputeDevice, model, - m_Options.GetForcedUnsupportedOperations()); - - if (modelConverter.GetConversionResult() != ConversionResult::Success - && modelConverter.GetConversionResult() != ConversionResult::UnsupportedFeature) - { - cb(ErrorStatus::GENERAL_FAILURE, result); - return Void(); - } - - // Check each operation if it was converted successfully and copy the flags - // into the result (vector) that we need to return to Android - result.reserve(model.operations.size()); - for (uint32_t operationIdx = 0; operationIdx < model.operations.size(); operationIdx++) - { - bool operationSupported = modelConverter.IsOperationSupported(operationIdx); - result.push_back(operationSupported); - } - - cb(ErrorStatus::NONE, result); - return Void(); -} - -namespace -{ - -void NotifyCallbackAndCheck(const sp& callback, ErrorStatus errorStatus, - const ::android::sp& preparedModelPtr) -{ - Return returned = callback->notify(errorStatus, preparedModelPtr); - // This check is required, if the callback fails and it isn't checked it will bring down the service - if (!returned.isOk()) - { - ALOGE("ArmnnDriver::prepareModel: hidl callback failed to return properly: %s ", - returned.description().c_str()); - } -} - -Return FailPrepareModel(ErrorStatus error, - const std::string& message, - const sp& callback) -{ - ALOGW("ArmnnDriver::prepareModel: %s", message.c_str()); - NotifyCallbackAndCheck(callback, error, nullptr); - return error; -} - -} - -Return ArmnnDriver::prepareModel(const V1_0::Model& model, - const sp& cb) -{ - ALOGV("ArmnnDriver::prepareModel()"); - - if (cb.get() == nullptr) - { - ALOGW("ArmnnDriver::prepareModel: Invalid callback passed to prepareModel"); - return ErrorStatus::INVALID_ARGUMENT; - } - - if (!m_Runtime) - { - return FailPrepareModel(ErrorStatus::DEVICE_UNAVAILABLE, "ArmnnDriver::prepareModel: Device unavailable", cb); - } - - if (!android::nn::validateModel(model)) - { - return FailPrepareModel(ErrorStatus::INVALID_ARGUMENT, - "ArmnnDriver::prepareModel: Invalid model passed as input", cb); - } - - if (m_Options.UseAndroidNnCpuExecutor()) - { - sp preparedModel = new AndroidNnCpuExecutorPreparedModel(model, - m_Options.GetRequestInputsAndOutputsDumpDir()); - if (preparedModel->Initialize()) - { - NotifyCallbackAndCheck(cb, ErrorStatus::NONE, preparedModel); - return ErrorStatus::NONE; - } - else - { - NotifyCallbackAndCheck(cb, ErrorStatus::INVALID_ARGUMENT, preparedModel); - return ErrorStatus::INVALID_ARGUMENT; - } - } - - // Deliberately ignore any unsupported operations requested by the options - - // at this point we're being asked to prepare a model that we've already declared support for - // and the operation indices may be different to those in getSupportedOperations anyway. - std::set unsupportedOperations; - ModelToINetworkConverter modelConverter(m_Runtime->GetDeviceSpec().DefaultComputeDevice, model, - unsupportedOperations); - - if (modelConverter.GetConversionResult() != ConversionResult::Success) - { - FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb); - return ErrorStatus::NONE; - } - - // optimize the network - armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); - try - { - optNet = armnn::Optimize(*modelConverter.GetINetwork(), m_Runtime->GetDeviceSpec()); - } - catch (armnn::Exception& e) - { - std::stringstream message; - message << "armnn::Exception ("<LoadNetwork(netId, std::move(optNet)) != armnn::Status::Success) - { - return FailPrepareModel(ErrorStatus::GENERAL_FAILURE, - "ArmnnDriver::prepareModel: Network could not be loaded", cb); - } - } - catch (armnn::Exception& e) - { - std::stringstream message; - message << "armnn::Exception (" << e.what()<< ") caught from LoadNetwork."; - FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb); - return ErrorStatus::NONE; - } - - std::unique_ptr preparedModel(new ArmnnPreparedModel( - netId, - m_Runtime.get(), - model, - m_Options.GetRequestInputsAndOutputsDumpDir() - )); - - // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if - // this is enabled) before the first 'real' inference which removes the overhead of the first inference. - preparedModel->ExecuteWithDummyInputs(); - - if (m_ClTunedParameters && - m_Options.GetClTunedParametersMode() == armnn::IClTunedParameters::Mode::UpdateTunedParameters) - { - // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file. - try - { - m_ClTunedParameters->Save(m_Options.GetClTunedParametersFile().c_str()); - } - catch (const armnn::Exception& error) - { - ALOGE("ArmnnDriver: Failed to save CL tuned parameters file '%s': %s", - m_Options.GetClTunedParametersFile().c_str(), error.what()); - } - } - - NotifyCallbackAndCheck(cb, ErrorStatus::NONE, preparedModel.release()); - - return ErrorStatus::NONE; -} - -Return ArmnnDriver::getStatus() -{ - ALOGV("ArmnnDriver::getStatus()"); - return DeviceStatus::AVAILABLE; -} - -} diff --git a/ArmnnDriver.hpp b/ArmnnDriver.hpp index e8dc3bfb..32a10a47 100644 --- a/ArmnnDriver.hpp +++ b/ArmnnDriver.hpp @@ -5,61 +5,48 @@ #pragma once -#include "HalInterfaces.h" -#include "NeuralNetworks.h" -#include +#include -#include -#include -#include +#include -// For Android O, explicitly declare the V1_0 HAL namespace to shorten type declarations, -// as the namespace is not defined in HalInterfaces.h. -namespace V1_0 = ::android::hardware::neuralnetworks::V1_0; +#if defined(ARMNN_ANDROID_NN_V1_1) // Using ::android::hardware::neuralnetworks::V1_1. + +#include "1.1/ArmnnDriver.hpp" namespace armnn_driver { -class DriverOptions +class ArmnnDriver : public V1_1::ArmnnDriver { public: - DriverOptions(armnn::Compute computeDevice); - DriverOptions(int argc, char** argv); - DriverOptions(DriverOptions&& other) = default; + ArmnnDriver(DriverOptions options) + : V1_1::ArmnnDriver(std::move(options)) + { + ALOGV("ArmnnDriver::ArmnnDriver()"); + } + ~ArmnnDriver() {} +}; - armnn::Compute GetComputeDevice() const { return m_ComputeDevice; } - bool IsVerboseLoggingEnabled() const { return m_VerboseLogging; } - const std::string& GetRequestInputsAndOutputsDumpDir() const { return m_RequestInputsAndOutputsDumpDir; } - bool UseAndroidNnCpuExecutor() const { return m_UseAndroidNnCpuExecutor; } - const std::set& GetForcedUnsupportedOperations() const { return m_ForcedUnsupportedOperations; } - const std::string& GetClTunedParametersFile() const { return m_ClTunedParametersFile; } - armnn::IClTunedParameters::Mode GetClTunedParametersMode() const { return m_ClTunedParametersMode; } +} // namespace armnn_driver -private: - armnn::Compute m_ComputeDevice; - bool m_VerboseLogging; - bool m_UseAndroidNnCpuExecutor; - std::string m_RequestInputsAndOutputsDumpDir; - std::set m_ForcedUnsupportedOperations; - std::string m_ClTunedParametersFile; - armnn::IClTunedParameters::Mode m_ClTunedParametersMode; -}; +#else // Fallback to ::android::hardware::neuralnetworks::V1_0. -class ArmnnDriver : public V1_0::IDevice { -public: - ArmnnDriver(DriverOptions options); - virtual ~ArmnnDriver() {} - virtual Return getCapabilities(V1_0::IDevice::getCapabilities_cb _hidl_cb) override; - virtual Return getSupportedOperations(const V1_0::Model &model, - V1_0::IDevice::getSupportedOperations_cb _hidl_cb) override; - virtual Return prepareModel(const V1_0::Model &model, - const android::sp& callback); - virtual Return getStatus() override; +#include "1.0/ArmnnDriver.hpp" + +namespace armnn_driver +{ -private: - armnn::IRuntimePtr m_Runtime; - armnn::IClTunedParametersPtr m_ClTunedParameters; - DriverOptions m_Options; +class ArmnnDriver : public V1_0::ArmnnDriver +{ +public: + ArmnnDriver(DriverOptions options) + : V1_0::ArmnnDriver(std::move(options)) + { + ALOGV("ArmnnDriver::ArmnnDriver()"); + } + ~ArmnnDriver() {} }; -} +} // namespace armnn_driver + +#endif diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp index 3aad955b..d338fdc8 100644 --- a/ArmnnPreparedModel.cpp +++ b/ArmnnPreparedModel.cpp @@ -83,6 +83,8 @@ inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t ind } +using namespace android::hardware; + namespace armnn_driver { @@ -106,21 +108,31 @@ void ArmnnPreparedModel::DumpTensorsIfRequired(char const* tensorNamePrefix, } ArmnnPreparedModel::ArmnnPreparedModel(armnn::NetworkId networkId, - armnn::IRuntime* runtime, - const V1_0::Model& model, - const std::string& requestInputsAndOutputsDumpDir) -: m_NetworkId(networkId) -, m_Runtime(runtime) -, m_Model(model) -, m_RequestCount(0) -, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir) + armnn::IRuntime* runtime, + const neuralnetworks::V1_0::Model& model, + const std::string& requestInputsAndOutputsDumpDir, + const bool gpuProfilingEnabled) + : m_NetworkId(networkId) + , m_Runtime(runtime) + , m_Model(model) + , m_RequestCount(0) + , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir) + , m_GpuProfilingEnabled(gpuProfilingEnabled) { + // Enable profiling if required. + m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); } ArmnnPreparedModel::~ArmnnPreparedModel() { - //unload the network associated with this model + // Get a hold of the profiler used by this model. + std::shared_ptr profiler = m_Runtime->GetProfiler(m_NetworkId); + + // Unload the network associated with this model. m_Runtime->UnloadNetwork(m_NetworkId); + + // Dump the profiling info to a file if required. + DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get()); } Return ArmnnPreparedModel::execute(const Request& request, @@ -275,85 +287,4 @@ void ArmnnPreparedModel::ExecuteWithDummyInputs() } } -AndroidNnCpuExecutorPreparedModel::AndroidNnCpuExecutorPreparedModel(const V1_0::Model& model, - const std::string& requestInputsAndOutputsDumpDir) -: m_Model(model) -, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir) -, m_RequestCount(0) -{ -} - -bool AndroidNnCpuExecutorPreparedModel::Initialize() -{ - return setRunTimePoolInfosFromHidlMemories(&m_ModelPoolInfos, m_Model.pools); -} - -Return AndroidNnCpuExecutorPreparedModel::execute(const Request& request, - const ::android::sp& callback) -{ - m_RequestCount++; - std::vector requestPoolInfos; - - if (!setRunTimePoolInfosFromHidlMemories(&requestPoolInfos, request.pools)) - { - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "AndroidNnCpuExecutorPreparedModel::execute"); - return ErrorStatus::GENERAL_FAILURE; - } - - if (!m_RequestInputsAndOutputsDumpDir.empty()) - { - ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast(callback.get())); - } - - DumpTensorsIfRequired( - "Input", - m_Model.inputIndexes, - request.inputs, - requestPoolInfos); - - android::nn::CpuExecutor executor; - const int n = executor.run(m_Model, request, m_ModelPoolInfos, requestPoolInfos); - ErrorStatus executionStatus = - n == ANEURALNETWORKS_NO_ERROR ? ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE; - - DumpTensorsIfRequired( - "Output", - m_Model.outputIndexes, - request.outputs, - requestPoolInfos); - - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "AndroidNnCpuExecutorPreparedModel::execute"); - return executionStatus; -} - -void AndroidNnCpuExecutorPreparedModel::DumpTensorsIfRequired( - char const* tensorNamePrefix, - const hidl_vec& operandIndices, - const hidl_vec& requestArgs, - const std::vector& requestPoolInfos) -{ - if (m_RequestInputsAndOutputsDumpDir.empty()) - { - return; - } - - for (std::size_t i = 0; i < requestArgs.size(); ++i) - { - const Operand& operand = m_Model.operands[operandIndices[i]]; - const armnn::TensorInfo tensorInfo = GetTensorInfoForOperand(operand); - const armnn::Tensor tensor = GetTensorForRequestArgument(requestArgs[i], tensorInfo, requestPoolInfos); - const std::string tensorName = BuildTensorName(tensorNamePrefix, i); - if (tensor.GetMemoryArea() != nullptr) - { - std::string requestName = boost::str(boost::format("%1%_%2%.dump") % this % m_RequestCount); - DumpTensor(m_RequestInputsAndOutputsDumpDir, requestName, tensorName, tensor); - } - else - { - ALOGE("Cannot dump tensor %s. An error occurred converting the associated request argument to a tensor.", - tensorName.c_str()); - } - } -} - -} +} // namespace armnn_driver diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp index f7644b95..a700e54d 100644 --- a/ArmnnPreparedModel.hpp +++ b/ArmnnPreparedModel.hpp @@ -7,12 +7,11 @@ #include "RequestThread.hpp" -#include "HalInterfaces.h" -#include "NeuralNetworks.h" -#include - #include "ArmnnDriver.hpp" +#include +#include + #include #include @@ -24,8 +23,9 @@ class ArmnnPreparedModel : public IPreparedModel public: ArmnnPreparedModel(armnn::NetworkId networkId, armnn::IRuntime* runtime, - const V1_0::Model& model, - const std::string& requestInputsAndOutputsDumpDir); + const ::android::hardware::neuralnetworks::V1_0::Model& model, + const std::string& requestInputsAndOutputsDumpDir, + const bool gpuProfilingEnabled); virtual ~ArmnnPreparedModel(); @@ -46,40 +46,15 @@ private: template void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings); - armnn::NetworkId m_NetworkId; - armnn::IRuntime* m_Runtime; - V1_0::Model m_Model; + armnn::NetworkId m_NetworkId; + armnn::IRuntime* m_Runtime; + ::android::hardware::neuralnetworks::V1_0::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here - static RequestThread m_RequestThread; - uint32_t m_RequestCount; - const std::string& m_RequestInputsAndOutputsDumpDir; -}; - -class AndroidNnCpuExecutorPreparedModel : public IPreparedModel -{ -public: - - AndroidNnCpuExecutorPreparedModel(const V1_0::Model& model, const std::string& requestInputsAndOutputsDumpDir); - virtual ~AndroidNnCpuExecutorPreparedModel() { } - - bool Initialize(); - - virtual Return execute(const Request& request, - const ::android::sp& callback) override; - -private: - - void DumpTensorsIfRequired( - char const* tensorNamePrefix, - const hidl_vec& operandIndices, - const hidl_vec& requestArgs, - const std::vector& requestPoolInfos); - - V1_0::Model m_Model; - std::vector m_ModelPoolInfos; - const std::string& m_RequestInputsAndOutputsDumpDir; - uint32_t m_RequestCount; + static RequestThread m_RequestThread; + uint32_t m_RequestCount; + const std::string& m_RequestInputsAndOutputsDumpDir; + const bool m_GpuProfilingEnabled; }; } diff --git a/DriverOptions.cpp b/DriverOptions.cpp new file mode 100644 index 00000000..3d397e37 --- /dev/null +++ b/DriverOptions.cpp @@ -0,0 +1,159 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#define LOG_TAG "ArmnnDriver" + +#include "DriverOptions.hpp" +#include "Utils.hpp" + +#include +#include "SystemPropertiesUtils.hpp" + +#include + +#include +#include + +#include +#include +#include +#include + +using namespace android; +using namespace std; + +namespace armnn_driver +{ + +DriverOptions::DriverOptions(armnn::Compute computeDevice) + : m_ComputeDevice(computeDevice) + , m_VerboseLogging(false) + , m_ClTunedParametersMode(armnn::IGpuAccTunedParameters::Mode::UseTunedParameters) + , m_EnableGpuProfiling(false) + , m_fp16Enabled(false) +{ +} + +DriverOptions::DriverOptions(int argc, char** argv) + : m_ComputeDevice(armnn::Compute::GpuAcc) + , m_VerboseLogging(false) + , m_ClTunedParametersMode(armnn::IGpuAccTunedParameters::Mode::UseTunedParameters) + , m_EnableGpuProfiling(false) + , m_fp16Enabled(false) +{ + namespace po = boost::program_options; + + std::string computeDeviceAsString; + std::string unsupportedOperationsAsString; + std::string clTunedParametersModeAsString; + + po::options_description optionsDesc("Options"); + optionsDesc.add_options() + ("compute,c", + po::value(&computeDeviceAsString)->default_value("GpuAcc"), + "Which device to run layers on by default. Possible values are: CpuRef, CpuAcc, GpuAcc") + + ("verbose-logging,v", + po::bool_switch(&m_VerboseLogging), + "Turns verbose logging on") + + ("request-inputs-and-outputs-dump-dir,d", + po::value(&m_RequestInputsAndOutputsDumpDir)->default_value(""), + "If non-empty, the directory where request inputs and outputs should be dumped") + + ("unsupported-operations,u", + po::value(&unsupportedOperationsAsString)->default_value(""), + "If non-empty, a comma-separated list of operation indices which the driver will forcibly " + "consider unsupported") + + ("cl-tuned-parameters-file,t", + po::value(&m_ClTunedParametersFile)->default_value(""), + "If non-empty, the given file will be used to load/save CL tuned parameters. " + "See also --cl-tuned-parameters-mode") + + ("cl-tuned-parameters-mode,m", + po::value(&clTunedParametersModeAsString)->default_value("UseTunedParameters"), + "If 'UseTunedParameters' (the default), will read CL tuned parameters from the file specified by " + "--cl-tuned-parameters-file. " + "If 'UpdateTunedParameters', will also find the optimum parameters when preparing new networks and update " + "the file accordingly.") + + ("gpu-profiling,p", + po::bool_switch(&m_EnableGpuProfiling), + "Turns GPU profiling on"); + + ("fp16-enabled,f", + po::bool_switch(&m_fp16Enabled), + "Enables support for relaxed computation from Float32 to Float16"); + + po::variables_map variablesMap; + try + { + po::store(po::parse_command_line(argc, argv, optionsDesc), variablesMap); + po::notify(variablesMap); + } + catch (const po::error& e) + { + ALOGW("An error occurred attempting to parse program options: %s", e.what()); + } + + if (computeDeviceAsString == "CpuRef") + { + m_ComputeDevice = armnn::Compute::CpuRef; + } + else if (computeDeviceAsString == "GpuAcc") + { + m_ComputeDevice = armnn::Compute::GpuAcc; + } + else if (computeDeviceAsString == "CpuAcc") + { + m_ComputeDevice = armnn::Compute::CpuAcc; + } + else + { + ALOGW("Requested unknown compute device %s. Defaulting to compute id %s", + computeDeviceAsString.c_str(), GetComputeDeviceAsCString(m_ComputeDevice)); + } + + if (!unsupportedOperationsAsString.empty()) + { + std::istringstream argStream(unsupportedOperationsAsString); + + std::string s; + while (!argStream.eof()) + { + std::getline(argStream, s, ','); + try + { + unsigned int operationIdx = std::stoi(s); + m_ForcedUnsupportedOperations.insert(operationIdx); + } + catch (const std::invalid_argument&) + { + ALOGW("Ignoring invalid integer argument in -u/--unsupported-operations value: %s", s.c_str()); + } + } + } + + if (!m_ClTunedParametersFile.empty()) + { + // The mode is only relevant if the file path has been provided + if (clTunedParametersModeAsString == "UseTunedParameters") + { + m_ClTunedParametersMode = armnn::IGpuAccTunedParameters::Mode::UseTunedParameters; + } + else if (clTunedParametersModeAsString == "UpdateTunedParameters") + { + m_ClTunedParametersMode = armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters; + } + else + { + ALOGW("Requested unknown cl-tuned-parameters-mode '%s'. Defaulting to UseTunedParameters", + clTunedParametersModeAsString.c_str()); + } + } +} + +} // namespace armnn_driver diff --git a/DriverOptions.hpp b/DriverOptions.hpp new file mode 100644 index 00000000..4a378b26 --- /dev/null +++ b/DriverOptions.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include + +#include +#include + +namespace armnn_driver +{ + +class DriverOptions +{ +public: + DriverOptions(armnn::Compute computeDevice); + DriverOptions(int argc, char** argv); + DriverOptions(DriverOptions&& other) = default; + + armnn::Compute GetComputeDevice() const { return m_ComputeDevice; } + bool IsVerboseLoggingEnabled() const { return m_VerboseLogging; } + const std::string& GetRequestInputsAndOutputsDumpDir() const { return m_RequestInputsAndOutputsDumpDir; } + const std::set& GetForcedUnsupportedOperations() const { return m_ForcedUnsupportedOperations; } + const std::string& GetClTunedParametersFile() const { return m_ClTunedParametersFile; } + armnn::IGpuAccTunedParameters::Mode GetClTunedParametersMode() const { return m_ClTunedParametersMode; } + bool IsGpuProfilingEnabled() const { return m_EnableGpuProfiling; } + bool GetFp16Enabled() const { return m_fp16Enabled; } + +private: + armnn::Compute m_ComputeDevice; + bool m_VerboseLogging; + std::string m_RequestInputsAndOutputsDumpDir; + std::set m_ForcedUnsupportedOperations; + std::string m_ClTunedParametersFile; + armnn::IGpuAccTunedParameters::Mode m_ClTunedParametersMode; + bool m_EnableGpuProfiling; + bool m_fp16Enabled; +}; + +} // namespace armnn_driver diff --git a/ModelToINetworkConverter.cpp b/ModelToINetworkConverter.cpp index fe4e8ac1..3da56ef8 100644 --- a/ModelToINetworkConverter.cpp +++ b/ModelToINetworkConverter.cpp @@ -6,7 +6,7 @@ #define LOG_TAG "ArmnnDriver" #include "ModelToINetworkConverter.hpp" -#include "OperationsUtils.h" +#include #include #include @@ -19,6 +19,8 @@ #include #include +using namespace android::hardware; + namespace armnn_driver { class LayerInputHandle @@ -105,45 +107,66 @@ inline bool IsOperandTypeSupportedForTensors(OperandType type) type == OperandType::TENSOR_INT32; } -void CalcPadding(uint32_t input, uint32_t kernel, uint32_t stride, uint32_t& outPadHead, uint32_t& outPadTail, - android::nn::PaddingScheme scheme) -{ - int32_t padHead; - int32_t padTail; - calculateExplicitPadding(input, stride, kernel, scheme, &padHead, &padTail); - outPadHead = boost::numeric_cast(padHead); - outPadTail = boost::numeric_cast(padTail); -} - -bool ValidateBroadcast(const V1_0::Model& model, const V1_0::Operation& operation, uint32_t numInputs) +void BroadcastTensor(LayerInputHandle& input0, LayerInputHandle& input1, armnn::IConnectableLayer* startLayer, + armnn::INetwork& network) { - assert(operation.inputs.size() > 0); // This should have been validated by the caller - // validateModel() has been called already so we know the operation.inputs indexes are valid within model.operands. - const Operand& firstInput = model.operands[operation.inputs[0]]; + BOOST_ASSERT(startLayer != nullptr); + const armnn::TensorInfo& inputTensorInfo0 = input0.GetTensorInfo(); + const armnn::TensorInfo& inputTensorInfo1 = input1.GetTensorInfo(); - // We don't support broadcasting yet - we require all input operands to have the same shape - for (uint32_t i = 1; i < numInputs; ++i) + if (inputTensorInfo0.GetNumDimensions() != inputTensorInfo1.GetNumDimensions()) { - const Operand& otherInput = model.operands[operation.inputs[i]]; + // If the number of dimensions do not match then we need to add degenerate dimensions + // to the "smaller" tensor using a reshape: + // Small Big + // | | + // Reshape | + // \ / + // Add + bool input0IsBigger = inputTensorInfo0.GetNumDimensions() > inputTensorInfo1.GetNumDimensions(); - if (firstInput.dimensions.size() != otherInput.dimensions.size()) - { - return Fail("%s: Broadcasting not supported (Input 0 dims: %i Input %i dims: %i)", - __func__, firstInput.dimensions.size(), i, otherInput.dimensions.size()); - } + LayerInputHandle& smallTensorHandle = input0IsBigger ? input1 : input0; + const armnn::TensorInfo& smallTensorDims = smallTensorHandle.GetTensorInfo(); + + LayerInputHandle& bigTensorHandle = input0IsBigger ? input0 : input1; + const armnn::TensorInfo& bigTensorDims = bigTensorHandle.GetTensorInfo(); - for (unsigned int d = 0; d < firstInput.dimensions.size(); ++d) + const unsigned int bigTensorDimsNumber = bigTensorDims.GetNumDimensions(); + std::vector reshapedDims(bigTensorDimsNumber, 1); + unsigned int sizeDifference = bigTensorDimsNumber - smallTensorDims.GetNumDimensions(); + for (unsigned i = sizeDifference; i < bigTensorDimsNumber; ++i) { - if (firstInput.dimensions[d] != otherInput.dimensions[d]) - { - return Fail("%s: Broadcasting not supported (Dimension %i size mismatch. " - "Input 0: %i Input %i: %i)", - __func__, d, firstInput.dimensions[d], i, otherInput.dimensions[d]); - } + reshapedDims[i] = smallTensorDims.GetShape()[i-sizeDifference]; } + armnn::TensorInfo reshapedInfo = smallTensorDims; + reshapedInfo.SetShape(armnn::TensorShape{ static_cast(reshapedDims.size()), + reshapedDims.data() }); + + armnn::ReshapeDescriptor reshapeDesc; + reshapeDesc.m_TargetShape = reshapedInfo.GetShape(); + armnn::IConnectableLayer* const reshapeLayer = network.AddReshapeLayer(reshapeDesc); + smallTensorHandle.Connect(reshapeLayer->GetInputSlot(0)); + reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo); + + // Connect the outputs from new reshape and original input layer + reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(0)); + bigTensorHandle.Connect(startLayer->GetInputSlot(1)); } + else + { + input0.Connect(startLayer->GetInputSlot(0)); + input1.Connect(startLayer->GetInputSlot(1)); + } +} - return true; +void CalcPadding(uint32_t input, uint32_t kernel, uint32_t stride, uint32_t& outPadHead, uint32_t& outPadTail, + android::nn::PaddingScheme scheme) +{ + int32_t padHead; + int32_t padTail; + calculateExplicitPadding(input, stride, kernel, scheme, &padHead, &padTail); + outPadHead = boost::numeric_cast(padHead); + outPadTail = boost::numeric_cast(padTail); } Shape GetOperandShape(const Operand& operand) @@ -175,11 +198,17 @@ void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo, } } -const armnn::PermutationVector IdentityPermutation({ 0U, 1U, 2U, 3U }); +// 4D Tensor Permutations +const armnn::PermutationVector IdentityPermutation4D({ 0U, 1U, 2U, 3U }); const armnn::PermutationVector NHWCToArmNN({ 0U, 2U, 3U, 1U }); const armnn::PermutationVector ArmNNToNHWC({ 0U, 3U, 1U, 2U }); const armnn::PermutationVector SwapDim1And2({ 0U, 2U, 1U, 3U }); +// 3D Permutation Vectors +const armnn::PermutationVector IdentityPermutation3D({ 0U, 1U, 2U }); +const armnn::PermutationVector RotateTensorLeft({ 2U, 0U, 1U }); +const armnn::PermutationVector RotateTensorRight({ 1U, 2U, 0U }); + template armnn::IConnectableLayer& AddPermuteLayer(armnn::INetwork& network, OSlot& input, const armnn::PermutationVector& mappings) @@ -189,7 +218,7 @@ armnn::IConnectableLayer& AddPermuteLayer(armnn::INetwork& network, OSlot& input assert(layer != nullptr); - // Connect intput to swizzle layer + // Connect input to swizzle layer input.Connect(layer->GetInputSlot(0)); // Setup swizzled output @@ -199,22 +228,32 @@ armnn::IConnectableLayer& AddPermuteLayer(armnn::INetwork& network, OSlot& input return *layer; } -armnn::IConnectableLayer& SwizzleInDeswizzleOut(armnn::INetwork& network, LayerInputHandle& input, - armnn::IConnectableLayer& firstLayer, - armnn::IConnectableLayer& lastLayer) +void SwizzleIn(armnn::INetwork& network, LayerInputHandle& input, armnn::IConnectableLayer& layer, unsigned int index) { // Add swizzle layer armnn::IConnectableLayer& swizzleLayer = AddPermuteLayer(network, input, NHWCToArmNN); - // Connect swizzled input to layer - swizzleLayer.GetOutputSlot(0).Connect(firstLayer.GetInputSlot(0)); + swizzleLayer.GetOutputSlot(0).Connect(layer.GetInputSlot(index)); +} +armnn::IConnectableLayer& DeswizzleOut(armnn::INetwork& network, armnn::IConnectableLayer& layer, unsigned int index) +{ // Add deswizzle layer - armnn::IConnectableLayer& deswizzleLayer = AddPermuteLayer(network, lastLayer.GetOutputSlot(0), ArmNNToNHWC); - + armnn::IConnectableLayer& deswizzleLayer = AddPermuteLayer(network, layer.GetOutputSlot(index), ArmNNToNHWC); return deswizzleLayer; } +// only suitable for input/output slot index 0, for other slots, use SwizzleIn and DeswizzleOut directly +armnn::IConnectableLayer& SwizzleInDeswizzleOut(armnn::INetwork& network, + LayerInputHandle& input, + armnn::IConnectableLayer& firstLayer, + armnn::IConnectableLayer& lastLayer) +{ + SwizzleIn(network, input, firstLayer, 0); + return DeswizzleOut(network, lastLayer, 0); +} + +// only suitable for input/output slot index 0, for other slots, use SwizzleIn and DeswizzleOut directly armnn::IConnectableLayer& SwizzleInDeswizzleOut(armnn::INetwork& network, LayerInputHandle& input, armnn::IConnectableLayer& layer) { @@ -264,12 +303,34 @@ bool ValidateConcatOutputShape(const std::vector & inputShap return true; } +bool RequiresReshape(armnn::TensorShape & inputShape) +{ + return inputShape.GetNumDimensions() < 3; +} + +template +armnn::IConnectableLayer& AddReshapeLayer(armnn::INetwork& network, OSlot& inputLayer, + armnn::TensorInfo reshapeInfo) +{ + armnn::ReshapeDescriptor reshapeDescriptor; + reshapeDescriptor.m_TargetShape = reshapeInfo.GetShape(); + + armnn::IConnectableLayer* reshapeLayer = network.AddReshapeLayer(reshapeDescriptor); + assert(reshapeLayer != nullptr); + + // Attach the input layer to the reshape layer + inputLayer.Connect(reshapeLayer->GetInputSlot(0)); + reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapeInfo); + + return *reshapeLayer; +} + void SwizzleInputs(armnn::INetwork& network, std::vector& inputs, std::vector& inputShapes, const armnn::PermutationVector& mapping) { - if (!mapping.IsEqual(IdentityPermutation)) + if (!mapping.IsEqual(IdentityPermutation4D)) { size_t nInputs = inputs.size(); for (size_t i=0; i & permutationPair) +{ + assert(numberOfDimensions >= 3); + + // ArmNN uses Compute Library subtensors to perform concatenation + // This only works when concatenating along dimension 0 or 1 for a 4-D tensor, + // or along dimension 0 for a 3-D tensor. + if (numberOfDimensions == 4) + { + if (concatDimension == 3) + { + concatDimension = 1; + permutationPair = std::make_pair(NHWCToArmNN, ArmNNToNHWC); + } + else if (concatDimension == 2) + { + concatDimension = 1; + permutationPair = std::make_pair(SwapDim1And2, SwapDim1And2); + } + else + { + permutationPair = std::make_pair(IdentityPermutation4D, IdentityPermutation4D); + } + + } + else if (numberOfDimensions == 3) + { + if (concatDimension == 2) + { + concatDimension = 0; + permutationPair = std::make_pair(RotateTensorRight, RotateTensorLeft); + } + else if (concatDimension == 1) + { + concatDimension = 0; + permutationPair = std::make_pair(RotateTensorLeft, RotateTensorRight); + } + else + { + permutationPair = std::make_pair(IdentityPermutation3D, IdentityPermutation3D); + } + } +} + + } // namespace namespace armnn_driver @@ -294,7 +402,8 @@ class ConstTensorPin { public: // Creates an invalid tensor pin (can be used to signal errors) - ConstTensorPin() {} + // The optional flag can be set to indicate the tensor values were missing, but it was otherwise valid + ConstTensorPin(bool optional = false) : m_Optional(optional) {} // @param tensorInfo TensorInfo associated with the tensor. // @param valueStart Start address of tensor data. Belongs to one of the memory pools associated with @@ -324,7 +433,17 @@ public: ConstTensorPin(ConstTensorPin&& other) = default; bool IsValid() const { return m_ConstTensor.GetMemoryArea() != nullptr; } + bool IsOptional() const { return m_Optional; } const armnn::ConstTensor& GetConstTensor() const { return m_ConstTensor; } + const armnn::ConstTensor* GetConstTensorPtr() const + { + if (IsValid() && m_ConstTensor.GetNumElements() > 0) + { + return &m_ConstTensor; + } + // tensor is either invalid, or has no elements (indicating an optional tensor that was not provided) + return nullptr; + } private: armnn::ConstTensor m_ConstTensor; @@ -332,9 +451,12 @@ private: // swizzling. Otherwise, @ref m_ConstTensor will reference memory from one of // the pools associated with the model being converted. std::vector m_SwizzledTensorData; + // optional flag to indicate that an invalid tensor pin is not an error, but the optional values were not given + bool m_Optional; }; -ModelToINetworkConverter::ModelToINetworkConverter(armnn::Compute compute, const V1_0::Model& model, +ModelToINetworkConverter::ModelToINetworkConverter(armnn::Compute compute, + const neuralnetworks::V1_0::Model& model, const std::set& forcedUnsupportedOperations) : m_Compute(compute) , m_Model(model) @@ -471,37 +593,59 @@ void ModelToINetworkConverter::Convert() } } -bool ModelToINetworkConverter::ConvertOperation(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertOperation(const neuralnetworks::V1_0::Operation& operation) { switch (operation.type) { - case V1_0::OperationType::ADD: return ConvertAdd(operation); - case V1_0::OperationType::AVERAGE_POOL_2D: return ConvertAveragePool2d(operation); - case V1_0::OperationType::CONCATENATION: return ConvertConcatenation(operation); - case V1_0::OperationType::CONV_2D: return ConvertConv2d(operation); - case V1_0::OperationType::DEPTHWISE_CONV_2D: return ConvertDepthwiseConv2d(operation); - case V1_0::OperationType::FLOOR: return ConvertFloor(operation); - case V1_0::OperationType::FULLY_CONNECTED: return ConvertFullyConnected(operation); - case V1_0::OperationType::LOCAL_RESPONSE_NORMALIZATION: return ConvertLocalResponseNormalization(operation); - case V1_0::OperationType::LOGISTIC: return ConvertLogistic(operation); - case V1_0::OperationType::L2_NORMALIZATION: return ConvertL2Normalization(operation); - case V1_0::OperationType::L2_POOL_2D: return ConvertL2Pool2d(operation); - case V1_0::OperationType::MAX_POOL_2D: return ConvertMaxPool2d(operation); - case V1_0::OperationType::MUL: return ConvertMul(operation); - case V1_0::OperationType::RELU: return ConvertReLu(operation); - case V1_0::OperationType::RELU1: return ConvertReLu1(operation); - case V1_0::OperationType::RELU6: return ConvertReLu6(operation); - case V1_0::OperationType::SOFTMAX: return ConvertSoftmax(operation); - case V1_0::OperationType::TANH: return ConvertTanH(operation); - case V1_0::OperationType::RESHAPE: return ConvertReshape(operation); - case V1_0::OperationType::RESIZE_BILINEAR: return ConvertResizeBilinear(operation); - default: return Fail("%s: Operation type %s not supported in ArmnnDriver", - __func__, toString(operation.type).c_str()); + case neuralnetworks::V1_0::OperationType::ADD: + return ConvertAdd(operation); + case neuralnetworks::V1_0::OperationType::AVERAGE_POOL_2D: + return ConvertAveragePool2d(operation); + case neuralnetworks::V1_0::OperationType::CONCATENATION: + return ConvertConcatenation(operation); + case neuralnetworks::V1_0::OperationType::CONV_2D: + return ConvertConv2d(operation); + case neuralnetworks::V1_0::OperationType::DEPTHWISE_CONV_2D: + return ConvertDepthwiseConv2d(operation); + case neuralnetworks::V1_0::OperationType::FLOOR: + return ConvertFloor(operation); + case neuralnetworks::V1_0::OperationType::FULLY_CONNECTED: + return ConvertFullyConnected(operation); + case neuralnetworks::V1_0::OperationType::LOCAL_RESPONSE_NORMALIZATION: + return ConvertLocalResponseNormalization(operation); + case neuralnetworks::V1_0::OperationType::LOGISTIC: + return ConvertLogistic(operation); + case neuralnetworks::V1_0::OperationType::LSTM: + return ConvertLstm(operation); + case neuralnetworks::V1_0::OperationType::L2_NORMALIZATION: + return ConvertL2Normalization(operation); + case neuralnetworks::V1_0::OperationType::L2_POOL_2D: + return ConvertL2Pool2d(operation); + case neuralnetworks::V1_0::OperationType::MAX_POOL_2D: + return ConvertMaxPool2d(operation); + case neuralnetworks::V1_0::OperationType::MUL: + return ConvertMul(operation); + case neuralnetworks::V1_0::OperationType::RELU: + return ConvertReLu(operation); + case neuralnetworks::V1_0::OperationType::RELU1: + return ConvertReLu1(operation); + case neuralnetworks::V1_0::OperationType::RELU6: + return ConvertReLu6(operation); + case neuralnetworks::V1_0::OperationType::SOFTMAX: + return ConvertSoftmax(operation); + case neuralnetworks::V1_0::OperationType::TANH: + return ConvertTanH(operation); + case neuralnetworks::V1_0::OperationType::RESHAPE: + return ConvertReshape(operation); + case neuralnetworks::V1_0::OperationType::RESIZE_BILINEAR: + return ConvertResizeBilinear(operation); + default: + return Fail("%s: Operation type %s not supported in ArmnnDriver", + __func__, toString(operation.type).c_str()); } } - -bool ModelToINetworkConverter::ConvertAdd(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertAdd(const neuralnetworks::V1_0::Operation& operation) { LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0); LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1); @@ -511,8 +655,10 @@ bool ModelToINetworkConverter::ConvertAdd(const V1_0::Operation& operation) return Fail("%s: Operation has invalid inputs", __func__); } + // The FuseActivation parameter is always the input index 2 + // and it should be optional ActivationFn activationFunction; - if (!GetInputActivationFunction(operation, 2, activationFunction)) + if (!GetOptionalInputActivation(operation, 2, activationFunction)) { return Fail("%s: Operation has invalid inputs", __func__); } @@ -543,49 +689,7 @@ bool ModelToINetworkConverter::ConvertAdd(const V1_0::Operation& operation) if (endLayer != nullptr) { - // If the number of dimensions do not match then we need to add degenerate dimensions - // to the "smaller" tensor using a reshape: - // Small Big - // | | - // Reshape | - // \ / - // Add - if (inputTensorInfo0.GetNumDimensions() != inputTensorInfo1.GetNumDimensions()) - { - bool input0IsBigger = inputTensorInfo0.GetNumDimensions() > inputTensorInfo1.GetNumDimensions(); - - LayerInputHandle& smallTensorHandle = input0IsBigger ? input1 : input0; - const armnn::TensorInfo& smallTensorDims = smallTensorHandle.GetTensorInfo(); - - LayerInputHandle& bigTensorHandle = input0IsBigger ? input0 : input1; - const armnn::TensorInfo& bigTensorDims = bigTensorHandle.GetTensorInfo(); - - std::vector reshapedDims(bigTensorDims.GetNumDimensions(), 1); - unsigned int sizeDifference = bigTensorDims.GetNumDimensions() - smallTensorDims.GetNumDimensions(); - for (unsigned i = sizeDifference; i < bigTensorDims.GetNumDimensions(); ++i) - { - reshapedDims[i] = smallTensorDims.GetShape()[i-sizeDifference]; - } - armnn::TensorInfo reshapedInfo = smallTensorDims; - reshapedInfo.SetShape(armnn::TensorShape{ static_cast(reshapedDims.size()), - reshapedDims.data() }); - - armnn::ReshapeDescriptor reshapeDesc; - reshapeDesc.m_TargetShape = reshapedInfo.GetShape(); - armnn::IConnectableLayer* const reshapeLayer = m_Network->AddReshapeLayer(reshapeDesc); - smallTensorHandle.Connect(reshapeLayer->GetInputSlot(0)); - reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo); - - // Connect the outputs from new reshape and original input layer - reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(0)); - bigTensorHandle.Connect(startLayer->GetInputSlot(1)); - } - else - { - input0.Connect(startLayer->GetInputSlot(0)); - input1.Connect(startLayer->GetInputSlot(1)); - } - + BroadcastTensor(input0, input1, startLayer, *m_Network); return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer); } else @@ -594,12 +698,12 @@ bool ModelToINetworkConverter::ConvertAdd(const V1_0::Operation& operation) } } -bool ModelToINetworkConverter::ConvertAveragePool2d(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertAveragePool2d(const neuralnetworks::V1_0::Operation& operation) { return ConvertPooling2d(operation, __func__, armnn::PoolingAlgorithm::Average); } -bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertConcatenation(const neuralnetworks::V1_0::Operation& operation) { // The first N (0..N-1) inputs are tensors. The Nth input is the concatenation axis. if (operation.inputs.size() <= 1) @@ -622,6 +726,7 @@ bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& opera return Fail("%s: Operation has no outputs", __func__); } + armnn::TensorInfo outputInfo = GetTensorInfoForOperand(*outputOperand); armnn::TensorShape outputShape = outputInfo.GetShape(); @@ -640,41 +745,15 @@ bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& opera return Fail("%s: Operation has invalid concat axis: %d", __func__, concatDim); } - // ArmNN uses Compute Library subtensors to perform concatenation - // This only works when concatenating along dimension 0 or 1 for a 4-D tensor, - // or along dimension 0 for a 3-D tensor. - const armnn::PermutationVector* permuteVectorIn = &IdentityPermutation; - const armnn::PermutationVector* permuteVectorOut = &IdentityPermutation; - - assert(permuteVectorOut != nullptr); - - if (outputShape.GetNumDimensions() == 4) { - if (concatDim == 3) { - concatDim = 1; - permuteVectorIn = &NHWCToArmNN; - permuteVectorOut = &ArmNNToNHWC; - outputShape = armnnUtils::Permuted(outputShape, *permuteVectorIn); - outputInfo.SetShape(outputShape); - } else if (concatDim == 2) { - concatDim = 1; - permuteVectorIn = &SwapDim1And2; - permuteVectorOut = &SwapDim1And2; - outputShape = armnnUtils::Permuted(outputShape, *permuteVectorIn); - outputInfo.SetShape(outputShape); - } - } - else if (!(outputShape.GetNumDimensions() == 3 && concatDim == 0)) - { - // Operation unsupported - return false; - } - std::vector inputHandles; std::vector inputShapes; inputHandles.reserve(numInputTensors); inputShapes.reserve(numInputTensors); + bool inputsHaveBeenReshaped = false; + unsigned int tensorDimensionsAdded = 0; + for (uint32_t i = 0; i < numInputTensors; ++i) { const Operand* const operand = GetInputOperand(operation, i); @@ -683,9 +762,45 @@ bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& opera return Fail("%s: Operation has invalid inputs", __func__); } - inputShapes.emplace_back(GetTensorShapeForOperand(*operand)); - inputHandles.emplace_back(ConvertToLayerInputHandle(operation, i)); + armnn::TensorShape operandShape = GetTensorShapeForOperand(*operand); + LayerInputHandle operandInputHandle = ConvertToLayerInputHandle(operation, i); + + if (operandShape.GetNumDimensions() == 0) + { + return Fail("%s: Operands with rank 0 are not supported", __func__); + } + + if (RequiresReshape(operandShape)) + { + inputsHaveBeenReshaped = true; + + armnn::TensorInfo reshapeInfo = operandInputHandle.GetTensorInfo(); + + // Expand the tensor to three dimensions + if (operandShape.GetNumDimensions() == 2) + { + reshapeInfo.SetShape(armnn::TensorShape({1, operandShape[0], operandShape[1]})); + tensorDimensionsAdded = 1; + } + else + { + reshapeInfo.SetShape(armnn::TensorShape({1, 1, operandShape[0]})); + tensorDimensionsAdded = 2; + } + armnn::IConnectableLayer& newReshape = AddReshapeLayer( + *m_Network, + operandInputHandle, + reshapeInfo + ); + + // Point to the reshape operation rather then the input operation + operandShape = reshapeInfo.GetShape(); + operandInputHandle = LayerInputHandle(true, &newReshape.GetOutputSlot(0), reshapeInfo); + } + + inputShapes.emplace_back(operandShape); + inputHandles.emplace_back(operandInputHandle); if (!inputHandles.back().IsValid()) { @@ -695,9 +810,34 @@ bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& opera assert(inputShapes.size() == inputHandles.size()); + if (inputsHaveBeenReshaped) + { + // Adjust the concatenation dimension by the amount of dimensions added (if any) + concatDim += tensorDimensionsAdded; + + // Add extra dimensions to the output shape to reflect the addition of the reshape layers + if (tensorDimensionsAdded == 1) + { + outputShape = armnn::TensorShape({1, outputShape[0], outputShape[1]}); + } + else if (tensorDimensionsAdded == 2) + { + outputShape = armnn::TensorShape({1, 1, outputShape[0], outputShape[1]}); + } + } + + // Get the pair of permutations required for the concatenation + std::pair permutationPair = + std::make_pair(IdentityPermutation4D, IdentityPermutation4D); + + CreatePermutationParameters(inputShapes[0].GetNumDimensions(), concatDim, permutationPair); + + outputShape = armnnUtils::Permuted(outputShape, permutationPair.first); + outputInfo.SetShape(outputShape); + // this is no-op for identity swizzles, otherwise it replaces both // the handles and shapes with the swizzled layer output handles and shapes - SwizzleInputs(*m_Network, inputHandles, inputShapes, *permuteVectorIn); + SwizzleInputs(*m_Network, inputHandles, inputShapes, permutationPair.first); // Create an armnn merger layer descriptor - this will also perform validation on the input shapes armnn::OriginsDescriptor mergerDescriptor; @@ -746,19 +886,39 @@ bool ModelToINetworkConverter::ConvertConcatenation(const V1_0::Operation& opera inputHandles[static_cast(i)].Connect(layer->GetInputSlot(i)); } - if (permuteVectorOut != &IdentityPermutation) + // Add permutation layer and connect the output to it, the permutation becomes the output layer + armnn::IConnectableLayer& deswizzleLayer = AddPermuteLayer(*m_Network, + layer->GetOutputSlot(0), + permutationPair.second); + layer = &deswizzleLayer; + + if (inputsHaveBeenReshaped) { - // Add permutation layer and connect the output to it, the permutation becomes the output layer - armnn::IConnectableLayer& deswizzleLayer = AddPermuteLayer(*m_Network, - layer->GetOutputSlot(0), - *permuteVectorOut); - layer = &deswizzleLayer; + armnn::TensorInfo afterConcatInfo = layer->GetOutputSlot(0).GetTensorInfo(); + + // Undo the reshape knowing the amount of dimensions added + if (tensorDimensionsAdded == 1) + { + afterConcatInfo.SetShape(armnn::TensorShape({ afterConcatInfo.GetShape()[1], + afterConcatInfo.GetShape()[2] })); + } + else if (tensorDimensionsAdded == 2) + { + afterConcatInfo.SetShape(armnn::TensorShape({ afterConcatInfo.GetShape()[2], + afterConcatInfo.GetShape()[3] })); + } + + layer = &AddReshapeLayer( + *m_Network, + layer->GetOutputSlot(0), + afterConcatInfo + ); } return SetupAndTrackLayerOutputSlot(operation, 0, *layer); } -bool ModelToINetworkConverter::ConvertConv2d(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertConv2d(const neuralnetworks::V1_0::Operation& operation) { LayerInputHandle input = ConvertToLayerInputHandle(operation, 0); if (!input.IsValid()) @@ -860,7 +1020,7 @@ bool ModelToINetworkConverter::ConvertConv2d(const V1_0::Operation& operation) } } -bool ModelToINetworkConverter::ConvertDepthwiseConv2d(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertDepthwiseConv2d(const neuralnetworks::V1_0::Operation& operation) { LayerInputHandle input = ConvertToLayerInputHandle(operation, 0); if (!input.IsValid()) @@ -959,8 +1119,10 @@ bool ModelToINetworkConverter::ConvertDepthwiseConv2d(const V1_0::Operation& ope armnn::IsDepthwiseConvolutionSupported, m_Compute, swizzledInputInfo, + swizzledOutputInfo, desc, - weights.GetInfo())) + weights.GetInfo(), + bias.GetInfo())) { return false; } @@ -979,7 +1141,7 @@ bool ModelToINetworkConverter::ConvertDepthwiseConv2d(const V1_0::Operation& ope } } -bool ModelToINetworkConverter::ConvertFloor(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertFloor(const neuralnetworks::V1_0::Operation& operation) { LayerInputHandle input = ConvertToLayerInputHandle(operation, 0); if (!input.IsValid()) @@ -1009,7 +1171,7 @@ bool ModelToINetworkConverter::ConvertFloor(const V1_0::Operation& operation) return SetupAndTrackLayerOutputSlot(operation, 0, *layer); } -bool ModelToINetworkConverter::ConvertFullyConnected(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertFullyConnected(const neuralnetworks::V1_0::Operation& operation) { LayerInputHandle input = ConvertToLayerInputHandle(operation, 0); if (!input.IsValid()) @@ -1026,30 +1188,39 @@ bool ModelToINetworkConverter::ConvertFullyConnected(const V1_0::Operation& oper const armnn::TensorInfo& inputInfo = input.GetTensorInfo(); const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output); - armnn::TensorInfo reshapedInfo = inputInfo; + // ArmNN does not currently support non-fixed weights or bias + ConstTensorPin weightsPin = ConvertOperationInputToConstTensorPin(operation, 1); // 2D + ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin(operation, 2); // 1D + if (!weightsPin.IsValid() || !biasPin.IsValid()) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + + armnn::ConstTensor weights = weightsPin.GetConstTensor(); + armnn::ConstTensor bias = biasPin.GetConstTensor(); + + armnn::TensorInfo reshapedInfo = inputInfo; if (inputInfo.GetNumDimensions() > 2U) { + unsigned int dim0 = inputInfo.GetShape()[0]; unsigned int dim1 = inputInfo.GetShape()[1]; + for (unsigned int i = 2U; i < inputInfo.GetNumDimensions(); ++i) { dim1 *= inputInfo.GetShape()[i]; } - reshapedInfo.SetShape(armnn::TensorShape({inputInfo.GetShape()[0], dim1})); - } - // ArmNN does not currently support non-fixed weights or bias - ConstTensorPin weightsPin = ConvertOperationInputToConstTensorPin(operation, 1); // 2D - ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin(operation, 2); // 1D + unsigned int divisor = weights.GetInfo().GetShape()[1] / dim1; + if(dim0 % divisor != 0) + { + return Fail("%s: Failed to deduce tensor shape", __func__); + } - if (!weightsPin.IsValid() || !biasPin.IsValid()) - { - return Fail("%s: Operation has invalid inputs", __func__); + reshapedInfo.SetShape(armnn::TensorShape({dim0 / divisor, dim1 * divisor})); } // ensuring that the bias value is within 1% of the weights input (small float differences can exist) - armnn::ConstTensor weights = weightsPin.GetConstTensor(); - armnn::ConstTensor bias = biasPin.GetConstTensor(); SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), reshapedInfo); ActivationFn activationFunction; @@ -1065,7 +1236,10 @@ bool ModelToINetworkConverter::ConvertFullyConnected(const V1_0::Operation& oper if (!IsLayerSupported(__func__, armnn::IsFullyConnectedSupported, m_Compute, - reshapedInfo, + inputInfo, + outputInfo, + weights.GetInfo(), + bias.GetInfo(), desc)) { return false; @@ -1100,7 +1274,7 @@ bool ModelToINetworkConverter::ConvertFullyConnected(const V1_0::Operation& oper } } -bool ModelToINetworkConverter::ConvertLocalResponseNormalization(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertLocalResponseNormalization(const neuralnetworks::V1_0::Operation& operation) { LayerInputHandle input = ConvertToLayerInputHandle(operation, 0); if (!input.IsValid()) @@ -1158,7 +1332,7 @@ bool ModelToINetworkConverter::ConvertLocalResponseNormalization(const V1_0::Ope return SetupAndTrackLayerOutputSlot(operation, 0, outSwizzleLayer); } -bool ModelToINetworkConverter::ConvertLogistic(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertLogistic(const neuralnetworks::V1_0::Operation& operation) { armnn::ActivationDescriptor desc; desc.m_Function = armnn::ActivationFunction::Sigmoid; @@ -1166,7 +1340,7 @@ bool ModelToINetworkConverter::ConvertLogistic(const V1_0::Operation& operation) return ConvertToActivation(operation, __func__, desc); } -bool ModelToINetworkConverter::ConvertL2Normalization(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertL2Normalization(const neuralnetworks::V1_0::Operation& operation) { LayerInputHandle input = ConvertToLayerInputHandle(operation, 0); if (!input.IsValid()) @@ -1189,7 +1363,8 @@ bool ModelToINetworkConverter::ConvertL2Normalization(const V1_0::Operation& ope if (!IsLayerSupported(__func__, armnn::IsL2NormalizationSupported, m_Compute, - swizzledInputInfo)) + swizzledInputInfo, + swizzledOutputInfo)) { return false; } @@ -1203,17 +1378,17 @@ bool ModelToINetworkConverter::ConvertL2Normalization(const V1_0::Operation& ope return SetupAndTrackLayerOutputSlot(operation, 0, outSwizzleLayer); } -bool ModelToINetworkConverter::ConvertL2Pool2d(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertL2Pool2d(const neuralnetworks::V1_0::Operation& operation) { return ConvertPooling2d(operation, __func__, armnn::PoolingAlgorithm::L2); } -bool ModelToINetworkConverter::ConvertMaxPool2d(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertMaxPool2d(const neuralnetworks::V1_0::Operation& operation) { return ConvertPooling2d(operation, __func__, armnn::PoolingAlgorithm::Max); } -bool ModelToINetworkConverter::ConvertMul(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertMul(const neuralnetworks::V1_0::Operation& operation) { LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0); LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1); @@ -1223,43 +1398,42 @@ bool ModelToINetworkConverter::ConvertMul(const V1_0::Operation& operation) return Fail("%s: Operation has invalid inputs", __func__); } + // The FuseActivation parameter is always the input index 2 + // and it should be optional ActivationFn activationFunction; - if (!GetInputActivationFunction(operation, 2, activationFunction)) + if (!GetOptionalInputActivation(operation, 2, activationFunction)) { return Fail("%s: Operation has invalid inputs", __func__); } - if (!ValidateBroadcast(m_Model, operation, 2u)) + const Operand* outputOperand = GetOutputOperand(operation, 0); + + if (outputOperand == nullptr) { - return Fail("%s is invalid due to broadcasting", __func__); + return false; } + const armnn::TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand); + if (!IsLayerSupported(__func__, armnn::IsMultiplicationSupported, m_Compute, input0.GetTensorInfo(), - input1.GetTensorInfo())) - { - return false; - } - - const Operand* outputOperand = GetOutputOperand(operation, 0); - - if (outputOperand == nullptr) + input1.GetTensorInfo(), + outInfo)) { return false; } - const armnn::TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand); - armnn::IConnectableLayer* const startLayer = m_Network->AddMultiplicationLayer(); armnn::IConnectableLayer* const endLayer = ProcessActivation(outInfo, activationFunction, startLayer); + const armnn::TensorInfo& inputTensorInfo0 = input0.GetTensorInfo(); + const armnn::TensorInfo& inputTensorInfo1 = input1.GetTensorInfo(); + if (endLayer != nullptr) { - input0.Connect(startLayer->GetInputSlot(0)); - input1.Connect(startLayer->GetInputSlot(1)); - + BroadcastTensor(input0, input1, startLayer, *m_Network); return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer); } else @@ -1268,7 +1442,7 @@ bool ModelToINetworkConverter::ConvertMul(const V1_0::Operation& operation) } } -bool ModelToINetworkConverter::ConvertReLu(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertReLu(const neuralnetworks::V1_0::Operation& operation) { armnn::ActivationDescriptor desc; desc.m_Function = armnn::ActivationFunction::ReLu; @@ -1276,7 +1450,7 @@ bool ModelToINetworkConverter::ConvertReLu(const V1_0::Operation& operation) return ConvertToActivation(operation, __func__, desc); } -bool ModelToINetworkConverter::ConvertReLu1(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertReLu1(const neuralnetworks::V1_0::Operation& operation) { armnn::ActivationDescriptor desc; desc.m_Function = armnn::ActivationFunction::BoundedReLu; @@ -1286,7 +1460,7 @@ bool ModelToINetworkConverter::ConvertReLu1(const V1_0::Operation& operation) return ConvertToActivation(operation, __func__, desc); } -bool ModelToINetworkConverter::ConvertReLu6(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertReLu6(const neuralnetworks::V1_0::Operation& operation) { armnn::ActivationDescriptor desc; desc.m_Function = armnn::ActivationFunction::BoundedReLu; @@ -1295,7 +1469,7 @@ bool ModelToINetworkConverter::ConvertReLu6(const V1_0::Operation& operation) return ConvertToActivation(operation, __func__, desc); } -bool ModelToINetworkConverter::ConvertSoftmax(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertSoftmax(const neuralnetworks::V1_0::Operation& operation) { LayerInputHandle input = ConvertToLayerInputHandle(operation, 0); if (!input.IsValid()) @@ -1303,6 +1477,14 @@ bool ModelToINetworkConverter::ConvertSoftmax(const V1_0::Operation& operation) return Fail("%s: Operation has invalid inputs", __func__); } + const Operand* outputOperand = GetOutputOperand(operation, 0); + if (!outputOperand) + { + return Fail("%s: Operation has no outputs", __func__); + } + + const armnn::TensorInfo outInfo = GetTensorInfoForOperand(*outputOperand); + armnn::SoftmaxDescriptor desc; if (!GetInputFloat32(operation, 1, desc.m_Beta)) { @@ -1313,6 +1495,7 @@ bool ModelToINetworkConverter::ConvertSoftmax(const V1_0::Operation& operation) armnn::IsSoftmaxSupported, m_Compute, input.GetTensorInfo(), + outInfo, desc)) { return false; @@ -1325,7 +1508,7 @@ bool ModelToINetworkConverter::ConvertSoftmax(const V1_0::Operation& operation) return SetupAndTrackLayerOutputSlot(operation, 0, *layer); } -bool ModelToINetworkConverter::ConvertTanH(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertTanH(const neuralnetworks::V1_0::Operation& operation) { armnn::ActivationDescriptor desc; desc.m_Function = armnn::ActivationFunction::TanH; @@ -1335,7 +1518,7 @@ bool ModelToINetworkConverter::ConvertTanH(const V1_0::Operation& operation) return ConvertToActivation(operation, __func__, desc); } -bool ModelToINetworkConverter::ConvertReshape(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertReshape(const neuralnetworks::V1_0::Operation& operation) { const Operand* inputOperand = GetInputOperand(operation, 0); const Operand* requestedShapeOperand = GetInputOperand(operation, 1); @@ -1403,7 +1586,7 @@ bool ModelToINetworkConverter::ConvertReshape(const V1_0::Operation& operation) return SetupAndTrackLayerOutputSlot(operation, 0, *layer); } -bool ModelToINetworkConverter::ConvertResizeBilinear(const V1_0::Operation& operation) +bool ModelToINetworkConverter::ConvertResizeBilinear(const neuralnetworks::V1_0::Operation& operation) { LayerInputHandle input = ConvertToLayerInputHandle(operation, 0); if (!input.IsValid()) @@ -1449,7 +1632,307 @@ bool ModelToINetworkConverter::ConvertResizeBilinear(const V1_0::Operation& oper } -bool ModelToINetworkConverter::ConvertToActivation(const V1_0::Operation& operation, +bool ModelToINetworkConverter::ConvertLstm(const neuralnetworks::V1_0::Operation& operation) +{ + // Inputs: + // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where + // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input. + LayerInputHandle input = ConvertToLayerInputHandle(operation, 0); + if (!input.IsValid()) + { + return Fail("%s: Could not read input 0: input", __func__); + } + // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + LayerInputHandle outputStateIn = ConvertToLayerInputHandle(operation, 18); + if (!outputStateIn.IsValid()) + { + return Fail("%s: Could not read input 18: outputStateIn", __func__); + } + // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + LayerInputHandle cellStateIn = ConvertToLayerInputHandle(operation, 19); + if (!cellStateIn.IsValid()) + { + return Fail("%s: Could not read input 19: cellStateIn", __func__); + } + + // Get the mandatory input tensors: + // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + const ConstTensorPin inputToForgetWeightsPin = ConvertOperationInputToConstTensorPin(operation, 2); + // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size]. + const ConstTensorPin inputToCellWeightsPin = ConvertOperationInputToConstTensorPin(operation, 3); + // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + const ConstTensorPin inputToOutputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 4); + // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + const ConstTensorPin recurrentToForgetWeightsPin = ConvertOperationInputToConstTensorPin(operation, 6); + // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + const ConstTensorPin recurrentToCellWeightsPin = ConvertOperationInputToConstTensorPin(operation, 7); + // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + const ConstTensorPin recurrentToOutputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 8); + // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin forgetGateBiasPin = ConvertOperationInputToConstTensorPin(operation, 13); + // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin cellBiasPin = ConvertOperationInputToConstTensorPin(operation, 14); + // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin outputGateBiasPin = ConvertOperationInputToConstTensorPin(operation, 15); + + if (!inputToForgetWeightsPin.IsValid() || + !inputToCellWeightsPin.IsValid() || + !inputToOutputWeightsPin.IsValid() || + !recurrentToForgetWeightsPin.IsValid() || + !recurrentToCellWeightsPin.IsValid() || + !recurrentToOutputWeightsPin.IsValid() || + !forgetGateBiasPin.IsValid() || + !cellBiasPin.IsValid() || + !outputGateBiasPin.IsValid()) + { + return Fail("%s: Operation has invalid tensor inputs", __func__); + } + + // Get the optional input tensors: + // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size], where “num_units” corresponds to the number of cell units. + const ConstTensorPin inputToInputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 1); + // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e., + // “num_units”), or the second dimension of the “projection_weights”, if defined. + const ConstTensorPin recurrentToInputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 5); + // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin cellToInputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 9); + // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin cellToForgetWeightsPin = ConvertOperationInputToConstTensorPin(operation, 10); + // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin cellToOutputWeightsPin = ConvertOperationInputToConstTensorPin(operation, 11); + // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + const ConstTensorPin inputGateBiasPin = ConvertOperationInputToConstTensorPin(operation, 12); + // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [output_size, num_units]. + const ConstTensorPin projectionWeightsPin = ConvertOperationInputToConstTensorPin(operation, 16); + // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size]. + const ConstTensorPin projectionBiasPin = ConvertOperationInputToConstTensorPin(operation, 17); + + if ((!inputToInputWeightsPin.IsValid() && !inputToInputWeightsPin.IsOptional()) || + (!recurrentToInputWeightsPin.IsValid() && !recurrentToInputWeightsPin.IsOptional()) || + (!cellToInputWeightsPin.IsValid() && !cellToInputWeightsPin.IsOptional()) || + (!cellToForgetWeightsPin.IsValid() && !cellToForgetWeightsPin.IsOptional()) || + (!cellToOutputWeightsPin.IsValid() && !cellToOutputWeightsPin.IsOptional()) || + (!inputGateBiasPin.IsValid() && !inputGateBiasPin.IsOptional()) || + (!projectionWeightsPin.IsValid() && !projectionWeightsPin.IsOptional()) || + (!projectionBiasPin.IsValid() && !projectionBiasPin.IsOptional())) + { + return Fail("%s: Operation has invalid tensor inputs", __func__); + } + + // Get the mandatory input scalars (actually 1-D tensors of size 1): + // 20: The activation function: A value indicating the activation function: + // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid. + // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip]. + // If set to 0.0 then clipping is disabled. + // 22: The clipping threshold: for the output from the projection layer, such that values are bound within + // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. + ActivationFn activation; + float cellClip; + float projClip; + if (!GetInputActivationFunctionFromTensor(operation, 20, activation) || + !GetInputScalar(operation, 21, OperandType::FLOAT32, cellClip) || + !GetInputScalar(operation, 22, OperandType::FLOAT32, projClip)) + { + return Fail("%s: Operation has invalid scalar inputs", __func__); + } + + // Outputs: + // 00: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with + // CIFG, or [batch_size, num_units * 3] without CIFG. + const Operand* scratchBuffer = GetOutputOperand(operation, 0); + if (!scratchBuffer) + { + return Fail("%s: Could not read output 0: scratchBuffer", __func__); + } + // 01: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + const Operand* outputStateOut = GetOutputOperand(operation, 1); + if (!outputStateOut) + { + return Fail("%s: Could not read output 1: outputStateOut", __func__); + } + // 02: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + const Operand* cellStateOut = GetOutputOperand(operation, 2); + if (!cellStateOut) + { + return Fail("%s: Could not read output 2: cellStateOut", __func__); + } + // 03: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is + // effectively the same as the current “output state (out)” value. + const Operand* output = GetOutputOperand(operation, 3); + if (!output) + { + return Fail("%s: Could not read output 3: output", __func__); + } + + // set the params structure for the AddLstmLayer call + armnn::LstmInputParams params; + params.m_InputToInputWeights = inputToInputWeightsPin.GetConstTensorPtr(); + params.m_InputToForgetWeights = inputToForgetWeightsPin.GetConstTensorPtr(); + params.m_InputToCellWeights = inputToCellWeightsPin.GetConstTensorPtr(); + params.m_InputToOutputWeights = inputToOutputWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToInputWeights = recurrentToInputWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToForgetWeights = recurrentToForgetWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToCellWeights = recurrentToCellWeightsPin.GetConstTensorPtr(); + params.m_RecurrentToOutputWeights = recurrentToOutputWeightsPin.GetConstTensorPtr(); + params.m_CellToInputWeights = cellToInputWeightsPin.GetConstTensorPtr(); + params.m_CellToForgetWeights = cellToForgetWeightsPin.GetConstTensorPtr(); + params.m_CellToOutputWeights = cellToOutputWeightsPin.GetConstTensorPtr(); + params.m_InputGateBias = inputGateBiasPin.GetConstTensorPtr(); + params.m_ForgetGateBias = forgetGateBiasPin.GetConstTensorPtr(); + params.m_CellBias = cellBiasPin.GetConstTensorPtr(); + params.m_OutputGateBias = outputGateBiasPin.GetConstTensorPtr(); + params.m_ProjectionWeights = projectionWeightsPin.GetConstTensorPtr(); + params.m_ProjectionBias = projectionBiasPin.GetConstTensorPtr(); + + // set the layer descriptor + armnn::LstmDescriptor desc; + desc.m_ActivationFunc = activation; + desc.m_ClippingThresCell = cellClip; + desc.m_ClippingThresProj = projClip; + desc.m_CifgEnabled = (params.m_InputToInputWeights == nullptr || + params.m_RecurrentToInputWeights == nullptr || + params.m_InputGateBias == nullptr); + desc.m_PeepholeEnabled = (params.m_CellToForgetWeights != nullptr || + params.m_CellToOutputWeights != nullptr); + desc.m_ProjectionEnabled = (params.m_ProjectionWeights != nullptr); + + // validate the optional input groups + if (desc.m_CifgEnabled && + (params.m_InputToInputWeights != nullptr || + params.m_RecurrentToInputWeights != nullptr || + params.m_InputGateBias != nullptr)) + { + return Fail("%s: All, or none, of input-to-input weights, recurrent-to-input weights," + " and input gate bias must be provided", __func__); + } + + if (!desc.m_ProjectionEnabled && params.m_ProjectionBias != nullptr) + { + return Fail("%s: projection bias should not be provided without projection weights", __func__); + } + + if (desc.m_PeepholeEnabled && + (params.m_CellToForgetWeights == nullptr || + params.m_CellToOutputWeights == nullptr || + (!desc.m_CifgEnabled && params.m_CellToInputWeights == nullptr))) + { + return Fail("%s: All, or none, of cell-to-forget weights and cell-to-output weights must be provided" + " and, if CIFG is not enabled, cell-to-input weights must also be provided", __func__); + } + + // Check if the layer is supported + // Inputs + const armnn::TensorInfo& inputInfo = input.GetTensorInfo(); + const armnn::TensorInfo& outputStateInInfo = outputStateIn.GetTensorInfo(); + const armnn::TensorInfo& cellStateInInfo = cellStateIn.GetTensorInfo(); + + // Outputs + const armnn::TensorInfo& scratchBufferInfo = GetTensorInfoForOperand(*scratchBuffer); + const armnn::TensorInfo& outputStateOutInfo = GetTensorInfoForOperand(*outputStateOut); + const armnn::TensorInfo& cellStateOutInfo = GetTensorInfoForOperand(*cellStateOut); + const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output); + + // Basic parameters + const armnn::TensorInfo& inputToForgetWeights = params.m_InputToForgetWeights->GetInfo(); + const armnn::TensorInfo& inputToCellWeights = params.m_InputToCellWeights->GetInfo(); + const armnn::TensorInfo& inputToOutputWeights = params.m_InputToOutputWeights->GetInfo(); + const armnn::TensorInfo& recurrentToForgetWeights = params.m_RecurrentToForgetWeights->GetInfo(); + const armnn::TensorInfo& recurrentToCellWeights = params.m_RecurrentToCellWeights->GetInfo(); + const armnn::TensorInfo& recurrentToOutputWeights = params.m_RecurrentToOutputWeights->GetInfo(); + const armnn::TensorInfo& forgetGateBias = params.m_ForgetGateBias->GetInfo(); + const armnn::TensorInfo& cellBias = params.m_CellBias->GetInfo(); + const armnn::TensorInfo& outputGateBias = params.m_OutputGateBias->GetInfo(); + + //Optional parameters + const armnn::TensorInfo* inputToInputWeights = nullptr; + const armnn::TensorInfo* recurrentToInputWeights = nullptr; + const armnn::TensorInfo* cellToInputWeights = nullptr; + const armnn::TensorInfo* inputGateBias = nullptr; + const armnn::TensorInfo* projectionWeights = nullptr; + const armnn::TensorInfo* projectionBias = nullptr; + const armnn::TensorInfo* cellToForgetWeights = nullptr; + const armnn::TensorInfo* cellToOutputWeights = nullptr; + + if(!desc.m_CifgEnabled) + { + inputToInputWeights = &(params.m_InputToInputWeights->GetInfo()); + recurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo()); + if (params.m_CellToInputWeights != nullptr) + { + cellToInputWeights = &(params.m_CellToInputWeights->GetInfo()); + } + inputGateBias = &(params.m_InputGateBias->GetInfo()); + } + + if(desc.m_ProjectionEnabled) + { + projectionWeights = &(params.m_ProjectionWeights->GetInfo()); + if (params.m_ProjectionBias != nullptr) + { + projectionBias = &(params.m_ProjectionBias->GetInfo()); + } + } + + if(desc.m_PeepholeEnabled) + { + cellToForgetWeights = &(params.m_CellToForgetWeights->GetInfo()); + cellToOutputWeights = &(params.m_CellToOutputWeights->GetInfo()); + } + + if (!IsLayerSupported(__func__, + armnn::IsLstmSupported, + m_Compute, + inputInfo, + outputStateInInfo, + cellStateInInfo, + scratchBufferInfo, + outputStateOutInfo, + cellStateOutInfo, + outputInfo, + desc, + inputToForgetWeights, + inputToCellWeights, + inputToOutputWeights, + recurrentToForgetWeights, + recurrentToCellWeights, + recurrentToOutputWeights, + forgetGateBias, + cellBias, + outputGateBias, + inputToInputWeights, + recurrentToInputWeights, + cellToInputWeights, + inputGateBias, + projectionWeights, + projectionBias, + cellToForgetWeights, + cellToOutputWeights)) + { + return false; + } + + // Add the layer + armnn::IConnectableLayer* layer = m_Network->AddLstmLayer(desc, params, "Lstm"); + + input.Connect(layer->GetInputSlot(0)); + outputStateIn.Connect(layer->GetInputSlot(1)); + cellStateIn.Connect(layer->GetInputSlot(2)); + + return (SetupAndTrackLayerOutputSlot(operation, 0, *layer, 0) && + SetupAndTrackLayerOutputSlot(operation, 1, *layer, 1) && + SetupAndTrackLayerOutputSlot(operation, 2, *layer, 2) && + SetupAndTrackLayerOutputSlot(operation, 3, *layer, 3)); +} + +bool ModelToINetworkConverter::ConvertToActivation(const neuralnetworks::V1_0::Operation& operation, const char* operationName, const armnn::ActivationDescriptor& activationDesc) { @@ -1459,10 +1942,17 @@ bool ModelToINetworkConverter::ConvertToActivation(const V1_0::Operation& operat return Fail("%s: Input 0 is invalid", operationName); } + const Operand* outputOperand = GetOutputOperand(operation, 0); + if (!outputOperand) + { + return false; + } + const armnn::TensorInfo outInfo = GetTensorInfoForOperand(*outputOperand); if (!IsLayerSupported(__func__, armnn::IsActivationSupported, m_Compute, input.GetTensorInfo(), + outInfo, activationDesc)) { return false; @@ -1475,7 +1965,7 @@ bool ModelToINetworkConverter::ConvertToActivation(const V1_0::Operation& operat return SetupAndTrackLayerOutputSlot(operation, 0, *layer); } -bool ModelToINetworkConverter::ConvertPooling2d(const V1_0::Operation& operation, +bool ModelToINetworkConverter::ConvertPooling2d(const neuralnetworks::V1_0::Operation& operation, const char* operationName, armnn::PoolingAlgorithm poolType) { @@ -1625,7 +2115,8 @@ const void* ModelToINetworkConverter::GetOperandValueReadOnlyAddress(const Opera return valueStart; } -const Operand* ModelToINetworkConverter::GetInputOperand(const V1_0::Operation& operation, uint32_t inputIndex) const +const Operand* ModelToINetworkConverter::GetInputOperand(const neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex) const { if (inputIndex >= operation.inputs.size()) { @@ -1637,7 +2128,8 @@ const Operand* ModelToINetworkConverter::GetInputOperand(const V1_0::Operation& return &m_Model.operands[operation.inputs[inputIndex]]; } -const Operand* ModelToINetworkConverter::GetOutputOperand(const V1_0::Operation& operation, uint32_t outputIndex) const +const Operand* ModelToINetworkConverter::GetOutputOperand(const neuralnetworks::V1_0::Operation& operation, + uint32_t outputIndex) const { if (outputIndex >= operation.outputs.size()) { @@ -1650,7 +2142,7 @@ const Operand* ModelToINetworkConverter::GetOutputOperand(const V1_0::Operation& } template -bool ModelToINetworkConverter::GetInputScalar(const V1_0::Operation& operation, uint32_t inputIndex, +bool ModelToINetworkConverter::GetInputScalar(const neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex, OperandType type, T& outValue) const { const Operand* operand = GetInputOperand(operation, inputIndex); @@ -1681,33 +2173,75 @@ bool ModelToINetworkConverter::GetInputScalar(const V1_0::Operation& operation, return true; } -bool ModelToINetworkConverter::GetInputInt32(const V1_0::Operation& operation, +bool ModelToINetworkConverter::GetInputInt32(const neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex, int32_t& outValue) const { return GetInputScalar(operation, inputIndex, OperandType::INT32, outValue); } -bool ModelToINetworkConverter::GetInputFloat32(const V1_0::Operation& operation, +bool ModelToINetworkConverter::GetInputFloat32(const neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex, float& outValue) const { return GetInputScalar(operation, inputIndex, OperandType::FLOAT32, outValue); } -bool ModelToINetworkConverter::GetInputActivationFunction(const V1_0::Operation& operation, - uint32_t inputIndex, - ActivationFn& outActivationFunction) const +bool ModelToINetworkConverter::GetInputActivationFunctionImpl(const neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex, + OperandType type, + ActivationFn& outActivationFunction) const { + if (type != OperandType::INT32 && type != OperandType::TENSOR_INT32) + { + return Fail("%s: unexpected operand type: %s (should be %s or %s)", + __func__, + toString(type).c_str(), + toString(OperandType::INT32).c_str(), + toString(OperandType::TENSOR_INT32).c_str()); + } + int32_t activationFunctionAsInt; - if (!GetInputInt32(operation, inputIndex, activationFunctionAsInt)) + if (!GetInputScalar(operation, inputIndex, type, activationFunctionAsInt)) { return Fail("%s: failed to get activation input value", __func__); } - outActivationFunction = static_cast(activationFunctionAsInt); return true; } -bool ModelToINetworkConverter::GetInputPaddingScheme(const V1_0::Operation& operation, +bool ModelToINetworkConverter::GetInputActivationFunction(const neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex, + ActivationFn& outActivationFunction) const +{ + return GetInputActivationFunctionImpl(operation, inputIndex, OperandType::INT32, outActivationFunction); +} + +bool ModelToINetworkConverter::GetInputActivationFunctionFromTensor(const neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex, + ActivationFn& outActivationFunction) const +{ + // This only accepts a 1-D tensor of size 1 + return GetInputActivationFunctionImpl(operation, inputIndex, OperandType::INT32, outActivationFunction); +} + +bool ModelToINetworkConverter::GetOptionalInputActivation(const neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex, + ActivationFn& activationFunction) const +{ + if (operation.inputs.size() <= inputIndex) + { + activationFunction = ActivationFn::kActivationNone; + } + else + { + if (!GetInputActivationFunction(operation, inputIndex, activationFunction)) + { + return Fail("%s: Operation has invalid inputs", __func__); + } + } + return true; +} + +bool ModelToINetworkConverter::GetInputPaddingScheme(const neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex, android::nn::PaddingScheme& outPaddingScheme) const { @@ -1722,7 +2256,7 @@ bool ModelToINetworkConverter::GetInputPaddingScheme(const V1_0::Operation& oper } LayerInputHandle ModelToINetworkConverter::ConvertToLayerInputHandle( - const V1_0::Operation& operation, + const neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex) { const Operand* operand = GetInputOperand(operation, inputIndex); @@ -1791,22 +2325,22 @@ LayerInputHandle ModelToINetworkConverter::ConvertToLayerInputHandle( } } -ConstTensorPin ModelToINetworkConverter::ConvertOperationInputToConstTensorPin(const V1_0::Operation& operation, - uint32_t inputIndex, const armnn::PermutationVector& dimensionMappings, - const armnn::TensorShape* overrideTensorShape) +ConstTensorPin ModelToINetworkConverter::ConvertOperationInputToConstTensorPin( + const neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex, const armnn::PermutationVector& dimensionMappings, + const armnn::TensorShape* overrideTensorShape, bool optional) { const Operand* operand = GetInputOperand(operation, inputIndex); if (!operand) { - Fail("%s: failed to get input operand", __func__); + Fail("%s: failed to get input operand: index=%u", __func__, inputIndex); return ConstTensorPin(); } - - return ConvertOperandToConstTensorPin(*operand, dimensionMappings, overrideTensorShape); + return ConvertOperandToConstTensorPin(*operand, dimensionMappings, overrideTensorShape, optional); } ConstTensorPin ModelToINetworkConverter::ConvertOperandToConstTensorPin(const Operand& operand, - const armnn::PermutationVector& dimensionMappings, const armnn::TensorShape* overrideTensorShape) + const armnn::PermutationVector& dimensionMappings, const armnn::TensorShape* overrideTensorShape, bool optional) { if (!IsOperandTypeSupportedForTensors(operand.type)) { @@ -1823,6 +2357,12 @@ ConstTensorPin ModelToINetworkConverter::ConvertOperandToConstTensorPin(const Op const void* const valueStart = GetOperandValueReadOnlyAddress(operand); if (!valueStart) { + if (optional) + { + // optional tensor with no values is not really an error; return it as invalid, but marked as optional + return ConstTensorPin(true); + } + // mandatory tensor with no values Fail("%s: failed to get operand address", __func__); return ConstTensorPin(); } @@ -1919,7 +2459,7 @@ armnn::IConnectableLayer* ModelToINetworkConverter::ProcessActivation(const armn } if (!IsLayerSupported(__func__, armnn::IsActivationSupported, m_Compute, - prevLayer->GetOutputSlot(0).GetTensorInfo(), activationDesc)) + prevLayer->GetOutputSlot(0).GetTensorInfo(), tensorInfo, activationDesc)) { return nullptr; } @@ -1933,19 +2473,21 @@ armnn::IConnectableLayer* ModelToINetworkConverter::ProcessActivation(const armn return activationLayer; } -bool ModelToINetworkConverter::SetupAndTrackLayerOutputSlot(const V1_0::Operation& operation, uint32_t outputIndex, - armnn::IConnectableLayer& layer) +bool ModelToINetworkConverter::SetupAndTrackLayerOutputSlot(const neuralnetworks::V1_0::Operation& operation, + uint32_t operationOutputIndex, + armnn::IConnectableLayer& layer, + uint32_t layerOutputIndex) { - const Operand* outputOperand = GetOutputOperand(operation, outputIndex); + const Operand* outputOperand = GetOutputOperand(operation, operationOutputIndex); - if ((outputOperand == nullptr) || (outputIndex >= layer.GetNumOutputSlots())) + if ((outputOperand == nullptr) || (operationOutputIndex >= layer.GetNumOutputSlots())) { return false; } - armnn::IOutputSlot& outputSlot = layer.GetOutputSlot(outputIndex); + armnn::IOutputSlot& outputSlot = layer.GetOutputSlot(layerOutputIndex); - const uint32_t operandIndex = operation.outputs[outputIndex]; + const uint32_t operandIndex = operation.outputs[operationOutputIndex]; m_OutputSlotForOperand[operandIndex] = &outputSlot; outputSlot.SetTensorInfo(GetTensorInfoForOperand(*outputOperand)); @@ -1953,6 +2495,13 @@ bool ModelToINetworkConverter::SetupAndTrackLayerOutputSlot(const V1_0::Operatio return true; } +bool ModelToINetworkConverter::SetupAndTrackLayerOutputSlot(const neuralnetworks::V1_0::Operation& operation, + uint32_t outputIndex, + armnn::IConnectableLayer& layer) +{ + return SetupAndTrackLayerOutputSlot(operation, outputIndex, layer, outputIndex); +} + bool ModelToINetworkConverter::IsOperationSupported(uint32_t operationIndex) const { std::map::const_iterator it = m_OperationSupported.find(operationIndex); diff --git a/ModelToINetworkConverter.hpp b/ModelToINetworkConverter.hpp index 864a2fcc..f0e28970 100644 --- a/ModelToINetworkConverter.hpp +++ b/ModelToINetworkConverter.hpp @@ -5,12 +5,11 @@ #pragma once -#include "HalInterfaces.h" -#include "NeuralNetworks.h" -#include "ActivationFunctor.h" - #include "ArmnnDriver.hpp" +#include +#include + #include #include #include @@ -39,7 +38,8 @@ enum class ConversionResult class ModelToINetworkConverter { public: - ModelToINetworkConverter(armnn::Compute compute, const V1_0::Model& model, + ModelToINetworkConverter(armnn::Compute compute, + const ::android::hardware::neuralnetworks::V1_0::Model& model, const std::set& forcedUnsupportedOperations); ConversionResult GetConversionResult() const { return m_ConversionResult; } @@ -52,82 +52,108 @@ public: private: void Convert(); - bool ConvertOperation(const V1_0::Operation& operation); + bool ConvertOperation(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertAdd(const V1_0::Operation& operation); + bool ConvertAdd(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertAveragePool2d(const V1_0::Operation& operation); + bool ConvertAveragePool2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertConcatenation(const V1_0::Operation& operation); + bool ConvertConcatenation(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertConv2d(const V1_0::Operation& operation); + bool ConvertConv2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertDepthwiseConv2d(const V1_0::Operation& operation); + bool ConvertDepthwiseConv2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertFloor(const V1_0::Operation& operation); + bool ConvertFloor(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertFullyConnected(const V1_0::Operation& operation); + bool ConvertFullyConnected(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertLogistic(const V1_0::Operation& operation); + bool ConvertLogistic(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertLocalResponseNormalization(const V1_0::Operation& operation); + bool ConvertLocalResponseNormalization(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertL2Normalization(const V1_0::Operation& operation); + bool ConvertL2Normalization(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertL2Pool2d(const V1_0::Operation& operation); + bool ConvertL2Pool2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertMaxPool2d(const V1_0::Operation& operation); + bool ConvertMaxPool2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertMul(const V1_0::Operation& operation); + bool ConvertMul(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertReLu(const V1_0::Operation& operation); + bool ConvertReLu(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertReLu1(const V1_0::Operation& operation); + bool ConvertReLu1(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertReLu6(const V1_0::Operation& operation); + bool ConvertReLu6(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertSoftmax(const V1_0::Operation& operation); + bool ConvertSoftmax(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertTanH(const V1_0::Operation& operation); + bool ConvertTanH(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertReshape(const V1_0::Operation& operation); + bool ConvertReshape(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertResizeBilinear(const V1_0::Operation& operation); + bool ConvertResizeBilinear(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); - bool ConvertToActivation(const V1_0::Operation& operation, const char* operationName, + bool ConvertLstm(const ::android::hardware::neuralnetworks::V1_0::Operation& operation); + + bool ConvertToActivation(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + const char* operationName, const armnn::ActivationDescriptor& activationDesc); - bool ConvertPooling2d(const V1_0::Operation& operation, const char* name, armnn::PoolingAlgorithm poolType); + bool ConvertPooling2d(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + const char* name, armnn::PoolingAlgorithm poolType); const void* GetOperandValueReadOnlyAddress(const Operand& operand) const; - const Operand* GetInputOperand(const V1_0::Operation& operation, uint32_t inputIndex) const; + const Operand* GetInputOperand(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex) const; - const Operand* GetOutputOperand(const V1_0::Operation& operation, uint32_t outputIndex) const; + const Operand* GetOutputOperand(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + uint32_t outputIndex) const; template - bool GetInputScalar(const V1_0::Operation& operation, uint32_t inputIndex, OperandType type, T& outValue) const; + bool GetInputScalar(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex, + OperandType type, T& outValue) const; - bool GetInputInt32(const V1_0::Operation& operation, uint32_t inputIndex, int32_t& outValue) const; + bool GetInputInt32(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex, + int32_t& outValue) const; - bool GetInputFloat32(const V1_0::Operation& operation, uint32_t inputIndex, float& outValue) const; + bool GetInputFloat32(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex, + float& outValue) const; - bool GetInputActivationFunction(const V1_0::Operation& operation, uint32_t inputIndex, - ActivationFn& outActivationFunction) const; + bool GetInputActivationFunctionImpl(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex, + OperandType type, + ActivationFn& outActivationFunction) const; - bool GetInputPaddingScheme(const V1_0::Operation& operation, uint32_t inputIndex, - android::nn::PaddingScheme& outPaddingScheme) const; + bool GetInputActivationFunction(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex, + ActivationFn& outActivationFunction) const; - LayerInputHandle ConvertToLayerInputHandle(const V1_0::Operation& operation, uint32_t inputIndex); + bool GetInputActivationFunctionFromTensor(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex, + ActivationFn& outActivationFunction) const; - ConstTensorPin ConvertOperationInputToConstTensorPin(const V1_0::Operation& operation, uint32_t inputIndex, - const armnn::PermutationVector& dimensionMappings = g_DontPermute, - const armnn::TensorShape* overrideTensorShape = nullptr); + bool GetOptionalInputActivation(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex, + ActivationFn& activationFunction) const; + + bool GetInputPaddingScheme(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex, + android::nn::PaddingScheme& outPaddingScheme) const; + + LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + uint32_t inputIndex); + + ConstTensorPin ConvertOperationInputToConstTensorPin( + const ::android::hardware::neuralnetworks::V1_0::Operation& operation, uint32_t inputIndex, + const armnn::PermutationVector& dimensionMappings = g_DontPermute, + const armnn::TensorShape* overrideTensorShape = nullptr, bool optional = false); ConstTensorPin ConvertOperandToConstTensorPin(const Operand& operand, const armnn::PermutationVector& dimensionMappings = g_DontPermute, - const armnn::TensorShape* overrideTensorShape = nullptr); + const armnn::TensorShape* overrideTensorShape = nullptr, bool optional = false); bool GetTensorInt32Values(const Operand& operand, std::vector& outValues) const; @@ -135,20 +161,25 @@ private: armnn::IConnectableLayer* ProcessActivation(const armnn::TensorInfo& tensorInfo, ActivationFn activation, armnn::IConnectableLayer* prevLayer); + bool SetupAndTrackLayerOutputSlot(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + uint32_t operationOutputIndex, + armnn::IConnectableLayer& layer, + uint32_t layerOutputIndex); - bool SetupAndTrackLayerOutputSlot(const V1_0::Operation& operation, uint32_t outputIndex, + bool SetupAndTrackLayerOutputSlot(const ::android::hardware::neuralnetworks::V1_0::Operation& operation, + uint32_t outputIndex, armnn::IConnectableLayer& layer); // Input data - armnn::Compute m_Compute; - const V1_0::Model& m_Model; - const std::set& m_ForcedUnsupportedOperations; + armnn::Compute m_Compute; + const ::android::hardware::neuralnetworks::V1_0::Model& m_Model; + const std::set& m_ForcedUnsupportedOperations; // Output data - armnn::INetworkPtr m_Network; - ConversionResult m_ConversionResult; - std::map m_OperationSupported; + armnn::INetworkPtr m_Network; + ConversionResult m_ConversionResult; + std::map m_OperationSupported; // Working/intermediate data std::vector m_OutputSlotForOperand; diff --git a/NnapiSupport.txt b/NnapiSupport.txt index 8973d901..de2e4071 100644 --- a/NnapiSupport.txt +++ b/NnapiSupport.txt @@ -1,6 +1,7 @@ ------ ArmNN for Android NNAPI supported operations ------ This release of ArmNN for Android supports use as a driver for the Android Neural Networks API. It implements the android.hardware.neuralnetworks@1.0 interface. +android.hardware.neuralnetworks@1.1 models available in Android P can be executed if they return true from the utility method compliantWithV1_0(model). For more information on the Android Neural Networks API, see https://developer.android.com/ndk/guides/neuralnetworks/index.html @@ -31,21 +32,35 @@ RESHAPE (FLOAT32,QUANT8_ASYMM) RESIZE_BILINEAR (FLOAT32) SOFTMAX (FLOAT32,QUANT8_ASYMM) TANH (FLOAT32) +LSTM (FLOAT32) * Depthwise convolution only supports a value of 1 for the depth multiplier. In addition, the QUANT8_ASYMM version only supports 3x3 kernels. --- Unsupported operators --- -The following AndroidNN operations are currently not supported. +The following AndroidNN 1.0 operations are currently not supported. DEPTH_TO_SPACE DEQUANTIZE EMBEDDING_LOOKUP HASHTABLE_LOOKUP LSH_PROJECTION -LSTM RNN SPACE_TO_DEPTH SVDF Where operations are not supported by the ArmNN Android NN Driver, the driver indicates this to the framework appropriately and the framework implements those operations using a CPU implementation. + +The following AndroidNN 1.1 operations are currently not supported. + +BATCH_TO_SPACE_ND +DIV +MEAN +PAD +SPACE_TO_BATCH_ND +SQUEEZE +STRIDED_SLICE +SUB +TRANSPOSE + +Where any of these operations are present the model will return false from the compliantWithV1_0(model) and the driver will not currently be utilised. diff --git a/README.md b/README.md index 4f780e7a..9939cf1b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ArmNN Android Neural Networks driver -This directory contains the ArmNN driver for the Android Neural Networks API, implementing the android.hardware.neuralnetworks@1.0 HAL. +This directory contains the ArmNN driver for the Android Neural Networks API, implementing the android.hardware.neuralnetworks@1.0 HAL and android.hardware.neuralnetworks@1.1 HAL. For more information about supported operations and configurations, see NnapiSupport.txt @@ -20,34 +20,50 @@ is built and copied to the `system/vendor/bin/hw` directory in the Android image To update the build environment, add to the contents of the variable `PRODUCT_PACKAGES` within the device-specific makefile that is located in the `/device//` directory. This file is normally called `device.mk`: + +For Android O or Android P, using NN API version (1.0), the following should be added to `device.mk`:
 PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.0-service-armnn
 
+For Android P, a new version of the NN API is available (1.1), +thus the following should be added to `device.mk` instead: +
+PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.1-service-armnn
+
+`Android.mk` contains the module definition of both versions of the ArmNN driver. 4. Build Android as normal, i.e. run `make` in `` 5. To confirm that the ArmNN driver has been built, check for driver service executable at
-/out/target/product//system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn
+/out/target/product/system/vendor/bin/hw
 
+For example, if the ArmNN driver has been built with the NN API 1.0, check for the following file: +
+/out/target/product/system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn
+
+ +Please Note: Android O is only compatible with NN API version 1.0. ### Testing -1. Run the ArmNN driver service executable in the background +1. Run the ArmNN driver service executable in the background. +The following examples assume that the 1.0 version of the driver is being used:
 adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn &
 
2. Run some code that exercises the Android Neural Networks API, for example Android's `NeuralNetworksTest` unit tests (note this is an optional component that must be built).
-adb shell /data/nativetest/NeuralNetworksTest/NeuralNetworksTest > NeuralNetworkTest.log
+adb shell /data/nativetest/NeuralNetworksTest_static/NeuralNetworksTest_static > NeuralNetworkTest.log
 
3. To confirm that the ArmNN driver is being used to service the Android Neural Networks API requests, check for messages in logcat with the `ArmnnDriver` tag. -### Using ClTuner +### Using the GPU tuner -ClTuner is a feature of the Compute Library that finds optimum values for OpenCL tuning parameters. The recommended way of using it with ArmNN is to generate the tuning data during development of the Android image for a device, and use it in read-only mode during normal operation: +The GPU tuner is a feature of the Compute Library that finds optimum values for GPU acceleration tuning parameters. The recommended way of using it with ArmNN is to generate the tuning data during development of the Android image for a device, and use it in read-only mode during normal operation: -1. Run the ArmNN driver service executable in tuning mode. The path to the tuning data must be writable by the service: +1. Run the ArmNN driver service executable in tuning mode. The path to the tuning data must be writable by the service. +The following examples assume that the 1.0 version of the driver is being used:
 adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn --cl-tuned-parameters-file <PATH_TO_TUNING_DATA> --cl-tuned-parameters-mode UpdateTunedParameters &
 
diff --git a/RequestThread.hpp b/RequestThread.hpp index 0983793e..2448dbec 100644 --- a/RequestThread.hpp +++ b/RequestThread.hpp @@ -10,8 +10,9 @@ #include #include -#include "CpuExecutor.h" -#include "HalInterfaces.h" +#include "ArmnnDriver.hpp" + +#include #include namespace armnn_driver diff --git a/SystemPropertiesUtils.hpp b/SystemPropertiesUtils.hpp index 57aa98ca..e27c5ca6 100644 --- a/SystemPropertiesUtils.hpp +++ b/SystemPropertiesUtils.hpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace { template @@ -80,4 +81,4 @@ T ParseSystemProperty(const char* name, T defaultValue) ALOGD("%s", messageBuilder.str().c_str()); return defaultValue; } -} //namespace \ No newline at end of file +} //namespace diff --git a/Utils.cpp b/Utils.cpp index 99912201..38a8cd31 100644 --- a/Utils.cpp +++ b/Utils.cpp @@ -18,6 +18,7 @@ #include using namespace android; +using namespace android::hardware; using namespace android::hidl::memory::V1_0; namespace armnn_driver @@ -111,7 +112,7 @@ std::string GetOperandSummary(const Operand& operand) toString(operand.type); } -std::string GetModelSummary(const V1_0::Model& model) +std::string GetModelSummary(const neuralnetworks::V1_0::Model& model) { std::stringstream result; @@ -280,9 +281,48 @@ void DumpTensor(const std::string& dumpDir, } } +void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled, + const std::string& dumpDir, + armnn::NetworkId networkId, + const armnn::IProfiler* profiler) +{ + // Check if profiling is required. + if (!gpuProfilingEnabled) + { + return; + } + + // The dump directory must exist in advance. + if (dumpDir.empty()) + { + return; + } + + BOOST_ASSERT(profiler); + + // Set the name of the output profiling file. + const std::string fileName = boost::str(boost::format("%1%/%2%_%3%.json") + % dumpDir + % std::to_string(networkId) + % "profiling"); + + // Open the ouput file for writing. + std::ofstream fileStream; + fileStream.open(fileName, std::ofstream::out | std::ofstream::trunc); + + if (!fileStream.good()) + { + ALOGW("Could not open file %s for writing", fileName.c_str()); + return; + } + + // Write the profiling info to a JSON file. + profiler->Print(fileStream); +} + void ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork, const std::string& dumpDir, - const V1_0::Model& model) + const neuralnetworks::V1_0::Model& model) { // The dump directory must exist in advance. if (dumpDir.empty()) @@ -318,4 +358,5 @@ void ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwor ALOGW("An error occurred when writing to file %s", fileName.c_str()); } } + } // namespace armnn_driver diff --git a/Utils.hpp b/Utils.hpp index 4b5066ee..e805f260 100644 --- a/Utils.hpp +++ b/Utils.hpp @@ -5,13 +5,13 @@ #pragma once -#include "HalInterfaces.h" -#include "NeuralNetworks.h" +#include "ArmnnDriver.hpp" + +#include + #include #include -#include "ArmnnDriver.hpp" - #include #include @@ -43,14 +43,20 @@ void* GetMemoryFromPool(DataLocation location, armnn::TensorInfo GetTensorInfoForOperand(const Operand& operand); std::string GetOperandSummary(const Operand& operand); -std::string GetModelSummary(const V1_0::Model& model); +std::string GetModelSummary(const ::android::hardware::neuralnetworks::V1_0::Model& model); void DumpTensor(const std::string& dumpDir, - const std::string& requestName, - const std::string& tensorName, - const armnn::ConstTensor& tensor); + const std::string& requestName, + const std::string& tensorName, + const armnn::ConstTensor& tensor); + +void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled, + const std::string& dumpDir, + armnn::NetworkId networkId, + const armnn::IProfiler* profiler); void ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork, const std::string& dumpDir, - const V1_0::Model& model); + const ::android::hardware::neuralnetworks::V1_0::Model& model); + } diff --git a/android.hardware.neuralnetworks@1.1-service-armnn.rc b/android.hardware.neuralnetworks@1.1-service-armnn.rc new file mode 100644 index 00000000..98efaf9c --- /dev/null +++ b/android.hardware.neuralnetworks@1.1-service-armnn.rc @@ -0,0 +1,4 @@ +service neuralnetworks_hal_service_armnn /vendor/bin/hw/android.hardware.neuralnetworks@1.1-service-armnn + class hal + user system + group system diff --git a/service.cpp b/service.cpp index 4ab59c85..c2701bf9 100644 --- a/service.cpp +++ b/service.cpp @@ -11,7 +11,6 @@ #include #include -#include using namespace armnn_driver; using namespace std; diff --git a/test/Android.mk b/test/Android.mk index 97e9a903..1cef3787 100644 --- a/test/Android.mk +++ b/test/Android.mk @@ -12,55 +12,72 @@ NN_HEADER_PATH := $(LOCAL_PATH)/../../../../frameworks/ml/nn/runtime/include ARMNN_HEADER_PATH := $(LOCAL_PATH)/../armnn/include ARMNN_DRIVER_HEADER_PATH := $(LOCAL_PATH)/.. +########################## +# armnn-driver-tests@1.0 # +########################## include $(CLEAR_VARS) -LOCAL_C_INCLUDES := \ - $(OPENCL_HEADER_PATH) \ - $(NN_HEADER_PATH) \ - $(ARMNN_HEADER_PATH) \ - $(ARMNN_DRIVER_HEADER_PATH) +LOCAL_MODULE := armnn-driver-tests@1.0 +LOCAL_MODULE_TAGS := eng optional +LOCAL_ARM_MODE := arm +LOCAL_PROPRIETARY_MODULE := true +# Mark source files as dependent on Android.mk +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk + +LOCAL_C_INCLUDES := \ + $(OPENCL_HEADER_PATH) \ + $(NN_HEADER_PATH) \ + $(ARMNN_HEADER_PATH) \ + $(ARMNN_DRIVER_HEADER_PATH) LOCAL_CFLAGS := \ - -std=c++14 \ - -fexceptions \ - -Werror \ - -UNDEBUG - -LOCAL_SRC_FILES := \ - Tests.cpp \ - UtilsTests.cpp \ - Concurrent.cpp \ - Convolution2D.cpp \ - FullyConnected.cpp \ - GenericLayerTests.cpp \ - DriverTestHelpers.cpp \ - SystemProperties.cpp \ - Merger.cpp \ - TestTensor.cpp + -std=c++14 \ + -fexceptions \ + -Werror \ + -O0 \ + -UNDEBUG +ifeq ($(PLATFORM_VERSION),9) +# Required to build with the changes made to the Android ML framework starting from Android P, +# regardless of the HAL version used for the build. +LOCAL_CFLAGS+= \ + -DARMNN_ANDROID_P +endif -LOCAL_STATIC_LIBRARIES := \ - libarmnn-driver \ - libneuralnetworks_common \ - libarmnn \ - libboost_log \ - libboost_system \ - libboost_unit_test_framework \ - libboost_thread \ - armnn-arm_compute - -LOCAL_SHARED_LIBRARIES := \ - libbase \ - libhidlbase \ - libhidltransport \ - libhidlmemory \ - liblog \ - libtextclassifier_hash \ - libutils \ - android.hardware.neuralnetworks@1.0 \ - android.hidl.allocator@1.0 \ - android.hidl.memory@1.0 \ - libOpenCL +LOCAL_SRC_FILES := \ + Tests.cpp \ + UtilsTests.cpp \ + Concurrent.cpp \ + Convolution2D.cpp \ + FullyConnected.cpp \ + GenericLayerTests.cpp \ + DriverTestHelpers.cpp \ + SystemProperties.cpp \ + Lstm.cpp \ + Merger.cpp \ + TestTensor.cpp +LOCAL_STATIC_LIBRARIES := \ + libarmnn-driver@1.0 \ + libneuralnetworks_common \ + libarmnn \ + libboost_log \ + libboost_system \ + libboost_unit_test_framework \ + libboost_thread \ + armnn-arm_compute + +LOCAL_SHARED_LIBRARIES := \ + libbase \ + libhidlbase \ + libhidltransport \ + libhidlmemory \ + liblog \ + libtextclassifier_hash \ + libutils \ + android.hardware.neuralnetworks@1.0 \ + android.hidl.allocator@1.0 \ + android.hidl.memory@1.0 \ + libOpenCL ifeq ($(PLATFORM_VERSION),9) # Required to build the 1.0 version of the NN Driver on Android P and later versions, # as the 1.0 version of the NN API needs the 1.1 HAL headers to be included regardless. @@ -68,18 +85,71 @@ LOCAL_SHARED_LIBRARIES+= \ android.hardware.neuralnetworks@1.1 endif -LOCAL_MODULE := armnn-driver-tests +include $(BUILD_EXECUTABLE) -LOCAL_MODULE_TAGS := eng optional +########################## +# armnn-driver-tests@1.1 # +########################## +include $(CLEAR_VARS) +LOCAL_MODULE := armnn-driver-tests@1.1 +LOCAL_MODULE_TAGS := eng optional LOCAL_ARM_MODE := arm - +LOCAL_PROPRIETARY_MODULE := true # Mark source files as dependent on Android.mk LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk -LOCAL_PROPRIETARY_MODULE := true +LOCAL_C_INCLUDES := \ + $(OPENCL_HEADER_PATH) \ + $(NN_HEADER_PATH) \ + $(ARMNN_HEADER_PATH) \ + $(ARMNN_DRIVER_HEADER_PATH) -include $(BUILD_EXECUTABLE) +LOCAL_CFLAGS := \ + -std=c++14 \ + -fexceptions \ + -Werror \ + -O0 \ + -UNDEBUG \ + -DARMNN_ANDROID_P \ + -DARMNN_ANDROID_NN_V1_1 + +LOCAL_SRC_FILES := \ + Tests.cpp \ + UtilsTests.cpp \ + Concurrent.cpp \ + Convolution2D.cpp \ + FullyConnected.cpp \ + GenericLayerTests.cpp \ + DriverTestHelpers.cpp \ + SystemProperties.cpp \ + Lstm.cpp \ + Merger.cpp \ + TestTensor.cpp +LOCAL_STATIC_LIBRARIES := \ + libarmnn-driver@1.1 \ + libneuralnetworks_common \ + libarmnn \ + libboost_log \ + libboost_system \ + libboost_unit_test_framework \ + libboost_thread \ + armnn-arm_compute + +LOCAL_SHARED_LIBRARIES := \ + libbase \ + libhidlbase \ + libhidltransport \ + libhidlmemory \ + liblog \ + libtextclassifier_hash \ + libutils \ + android.hardware.neuralnetworks@1.0 \ + android.hardware.neuralnetworks@1.1 \ + android.hidl.allocator@1.0 \ + android.hidl.memory@1.0 \ + libOpenCL +include $(BUILD_EXECUTABLE) diff --git a/test/Concurrent.cpp b/test/Concurrent.cpp index c2d58bde..e4940537 100644 --- a/test/Concurrent.cpp +++ b/test/Concurrent.cpp @@ -11,7 +11,9 @@ BOOST_AUTO_TEST_SUITE(ConcurrentDriverTests) using ArmnnDriver = armnn_driver::ArmnnDriver; using DriverOptions = armnn_driver::DriverOptions; using namespace android::nn; +using namespace android::hardware; using namespace driverTestHelpers; +using namespace armnn_driver; // Add our own test for concurrent execution // The main point of this test is to check that multiple requests can be @@ -22,7 +24,7 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute) ALOGI("ConcurrentExecute: entry"); auto driver = std::make_unique(DriverOptions(armnn::Compute::CpuRef)); - V1_0::Model model = {}; + neuralnetworks::V1_0::Model model = {}; // add operands int32_t actValue = 0; @@ -37,7 +39,7 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute) // make the fully connected operation model.operations.resize(1); - model.operations[0].type = V1_0::OperationType::FULLY_CONNECTED; + model.operations[0].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED; model.operations[0].inputs = hidl_vec{0, 1, 2, 3}; model.operations[0].outputs = hidl_vec{4}; diff --git a/test/Convolution2D.cpp b/test/Convolution2D.cpp index cc301bc9..fe28aa44 100644 --- a/test/Convolution2D.cpp +++ b/test/Convolution2D.cpp @@ -6,13 +6,13 @@ #include #include -#include "OperationsUtils.h" +#include BOOST_AUTO_TEST_SUITE(Convolution2DTests) -using ArmnnDriver = armnn_driver::ArmnnDriver; -using DriverOptions = armnn_driver::DriverOptions; +using namespace android::hardware; using namespace driverTestHelpers; +using namespace armnn_driver; namespace { @@ -20,7 +20,7 @@ namespace void PaddingTestImpl(android::nn::PaddingScheme paddingScheme) { auto driver = std::make_unique(DriverOptions(armnn::Compute::CpuRef)); - V1_0::Model model = {}; + neuralnetworks::V1_0::Model model = {}; uint32_t outSize = paddingScheme == android::nn::kPaddingSame ? 2 : 1; @@ -39,7 +39,7 @@ void PaddingTestImpl(android::nn::PaddingScheme paddingScheme) // make the convolution operation model.operations.resize(1); - model.operations[0].type = V1_0::OperationType::CONV_2D; + model.operations[0].type = neuralnetworks::V1_0::OperationType::CONV_2D; model.operations[0].inputs = hidl_vec{0, 1, 2, 3, 4, 5, 6}; model.operations[0].outputs = hidl_vec{7}; diff --git a/test/DriverTestHelpers.cpp b/test/DriverTestHelpers.cpp index d2d380a7..82087961 100644 --- a/test/DriverTestHelpers.cpp +++ b/test/DriverTestHelpers.cpp @@ -25,10 +25,12 @@ std::ostream& operator<<(std::ostream& os, ErrorStatus stat) } // namespace android::hardware } // namespace android - namespace driverTestHelpers { +using namespace android::hardware; +using namespace armnn_driver; + Return ExecutionCallback::notify(ErrorStatus status) { (void)status; @@ -107,13 +109,13 @@ void AddPoolAndSetData(uint32_t size, Request& request, const float* data) memcpy(dst, data, size * sizeof(float)); } -void AddOperand(V1_0::Model& model, const Operand& op) +void AddOperand(neuralnetworks::V1_0::Model& model, const Operand& op) { model.operands.resize(model.operands.size() + 1); model.operands[model.operands.size() - 1] = op; } -void AddIntOperand(V1_0::Model& model, int32_t value) +void AddIntOperand(neuralnetworks::V1_0::Model& model, int32_t value) { DataLocation location = {}; location.offset = model.operandValues.size(); @@ -131,10 +133,12 @@ void AddIntOperand(V1_0::Model& model, int32_t value) AddOperand(model, op); } -void AddInputOperand(V1_0::Model& model, hidl_vec dimensions) +void AddInputOperand(neuralnetworks::V1_0::Model& model, + hidl_vec dimensions, + neuralnetworks::V1_0::OperandType operandType) { Operand op = {}; - op.type = OperandType::TENSOR_FLOAT32; + op.type = operandType; op.dimensions = dimensions; op.lifetime = OperandLifeTime::MODEL_INPUT; @@ -144,10 +148,13 @@ void AddInputOperand(V1_0::Model& model, hidl_vec dimensions) model.inputIndexes[model.inputIndexes.size() - 1] = model.operands.size() - 1; } -void AddOutputOperand(V1_0::Model& model, hidl_vec dimensions) +void AddOutputOperand(neuralnetworks::V1_0::Model& model, + hidl_vec dimensions, + neuralnetworks::V1_0::OperandType operandType) { Operand op = {}; - op.type = OperandType::TENSOR_FLOAT32; + op.type = operandType; + op.scale = operandType == neuralnetworks::V1_0::OperandType::TENSOR_QUANT8_ASYMM ? 1.f / 255.f : 0.f; op.dimensions = dimensions; op.lifetime = OperandLifeTime::MODEL_OUTPUT; @@ -158,7 +165,7 @@ void AddOutputOperand(V1_0::Model& model, hidl_vec dimensions) } -android::sp PrepareModelWithStatus(const V1_0::Model& model, +android::sp PrepareModelWithStatus(const neuralnetworks::V1_0::Model& model, armnn_driver::ArmnnDriver& driver, ErrorStatus & prepareStatus, ErrorStatus expectedStatus) @@ -176,7 +183,7 @@ android::sp PrepareModelWithStatus(const V1_0::Model& model, return cb->GetPreparedModel(); } -android::sp PrepareModel(const V1_0::Model& model, +android::sp PrepareModel(const neuralnetworks::V1_0::Model& model, armnn_driver::ArmnnDriver& driver) { ErrorStatus prepareStatus = ErrorStatus::NONE; @@ -187,6 +194,7 @@ ErrorStatus Execute(android::sp preparedModel, const Request& request, ErrorStatus expectedStatus) { + BOOST_TEST(preparedModel.get() != nullptr); android::sp cb(new ExecutionCallback()); ErrorStatus execStatus = preparedModel->execute(request, cb); BOOST_TEST(execStatus == expectedStatus); diff --git a/test/DriverTestHelpers.hpp b/test/DriverTestHelpers.hpp index 57541a35..ccb6b983 100644 --- a/test/DriverTestHelpers.hpp +++ b/test/DriverTestHelpers.hpp @@ -72,9 +72,9 @@ android::sp AddPoolAndGetData(uint32_t size, Request& request); void AddPoolAndSetData(uint32_t size, Request& request, const float* data); -void AddOperand(V1_0::Model& model, const Operand& op); +void AddOperand(::android::hardware::neuralnetworks::V1_0::Model& model, const Operand& op); -void AddIntOperand(V1_0::Model& model, int32_t value); +void AddIntOperand(::android::hardware::neuralnetworks::V1_0::Model& model, int32_t value); template OperandType TypeToOperandType(); @@ -86,7 +86,10 @@ template<> OperandType TypeToOperandType(); template -void AddTensorOperand(V1_0::Model& model, hidl_vec dimensions, T* values) +void AddTensorOperand(::android::hardware::neuralnetworks::V1_0::Model& model, + hidl_vec dimensions, + T* values, + OperandType operandType = OperandType::TENSOR_FLOAT32) { uint32_t totalElements = 1; for (uint32_t dim : dimensions) @@ -99,7 +102,7 @@ void AddTensorOperand(V1_0::Model& model, hidl_vec dimensions, T* valu location.length = totalElements * sizeof(T); Operand op = {}; - op.type = TypeToOperandType(); + op.type = operandType; op.dimensions = dimensions; op.lifetime = OperandLifeTime::CONSTANT_COPY; op.location = location; @@ -113,14 +116,18 @@ void AddTensorOperand(V1_0::Model& model, hidl_vec dimensions, T* valu AddOperand(model, op); } -void AddInputOperand(V1_0::Model& model, hidl_vec dimensions); +void AddInputOperand(::android::hardware::neuralnetworks::V1_0::Model& model, + hidl_vec dimensions, + ::android::hardware::neuralnetworks::V1_0::OperandType operandType = OperandType::TENSOR_FLOAT32); -void AddOutputOperand(V1_0::Model& model, hidl_vec dimensions); +void AddOutputOperand(::android::hardware::neuralnetworks::V1_0::Model& model, + hidl_vec dimensions, + ::android::hardware::neuralnetworks::V1_0::OperandType operandType = OperandType::TENSOR_FLOAT32); -android::sp PrepareModel(const V1_0::Model& model, +android::sp PrepareModel(const ::android::hardware::neuralnetworks::V1_0::Model& model, armnn_driver::ArmnnDriver& driver); -android::sp PrepareModelWithStatus(const V1_0::Model& model, +android::sp PrepareModelWithStatus(const ::android::hardware::neuralnetworks::V1_0::Model& model, armnn_driver::ArmnnDriver& driver, ErrorStatus & prepareStatus, ErrorStatus expectedStatus=ErrorStatus::NONE); diff --git a/test/FullyConnected.cpp b/test/FullyConnected.cpp index 4feda30b..20a350c5 100644 --- a/test/FullyConnected.cpp +++ b/test/FullyConnected.cpp @@ -8,9 +8,9 @@ BOOST_AUTO_TEST_SUITE(FullyConnectedTests) -using ArmnnDriver = armnn_driver::ArmnnDriver; -using DriverOptions = armnn_driver::DriverOptions; +using namespace android::hardware; using namespace driverTestHelpers; +using namespace armnn_driver; // Add our own test here since we fail the fc tests which Google supplies (because of non-const weights) BOOST_AUTO_TEST_CASE(FullyConnected) @@ -19,7 +19,7 @@ BOOST_AUTO_TEST_CASE(FullyConnected) // but that uses slightly weird dimensions which I don't think we need to support for now auto driver = std::make_unique(DriverOptions(armnn::Compute::CpuRef)); - V1_0::Model model = {}; + neuralnetworks::V1_0::Model model = {}; // add operands int32_t actValue = 0; @@ -34,7 +34,7 @@ BOOST_AUTO_TEST_CASE(FullyConnected) // make the fully connected operation model.operations.resize(1); - model.operations[0].type = V1_0::OperationType::FULLY_CONNECTED; + model.operations[0].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED; model.operations[0].inputs = hidl_vec{0, 1, 2, 3}; model.operations[0].outputs = hidl_vec{4}; @@ -90,7 +90,7 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput) sup = supported; }; - V1_0::Model model = {}; + neuralnetworks::V1_0::Model model = {}; // operands int32_t actValue = 0; @@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput) model.operations.resize(1); - model.operations[0].type = V1_0::OperationType::FULLY_CONNECTED; + model.operations[0].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED; model.operations[0].inputs = hidl_vec{0,1,2,3}; model.operations[0].outputs = hidl_vec{4}; @@ -177,7 +177,7 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape) sup = supported; }; - V1_0::Model model = {}; + neuralnetworks::V1_0::Model model = {}; // operands int32_t actValue = 0; @@ -200,7 +200,7 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape) model.operations.resize(1); - model.operations[0].type = V1_0::OperationType::FULLY_CONNECTED; + model.operations[0].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED; model.operations[0].inputs = hidl_vec{0,1,2,3}; model.operations[0].outputs = hidl_vec{4}; diff --git a/test/GenericLayerTests.cpp b/test/GenericLayerTests.cpp index 7116f0b0..aa91ce15 100644 --- a/test/GenericLayerTests.cpp +++ b/test/GenericLayerTests.cpp @@ -8,189 +8,233 @@ BOOST_AUTO_TEST_SUITE(GenericLayerTests) -using ArmnnDriver = armnn_driver::ArmnnDriver; -using DriverOptions = armnn_driver::DriverOptions; +using namespace android::hardware; using namespace driverTestHelpers; +using namespace armnn_driver; BOOST_AUTO_TEST_CASE(GetSupportedOperations) { auto driver = std::make_unique(DriverOptions(armnn::Compute::CpuRef)); - ErrorStatus error; - std::vector sup; + ErrorStatus errorStatus; + std::vector supported; - ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector& supported) + auto cb = [&](ErrorStatus _errorStatus, const std::vector& _supported) { - error = status; - sup = supported; + errorStatus = _errorStatus; + supported = _supported; }; - V1_0::Model model1 = {}; + neuralnetworks::V1_0::Model model0 = {}; - // add operands + // Add operands int32_t actValue = 0; float weightValue[] = {2, 4, 1}; float biasValue[] = {4}; - AddInputOperand(model1, hidl_vec{1, 3}); + AddInputOperand (model0, hidl_vec{1, 3}); + AddTensorOperand(model0, hidl_vec{1, 3}, weightValue); + AddTensorOperand(model0, hidl_vec{1}, biasValue); + AddIntOperand (model0, actValue); + AddOutputOperand(model0, hidl_vec{1, 1}); + + model0.operations.resize(1); + + // Make a correct fully connected operation + model0.operations[0].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED; + model0.operations[0].inputs = hidl_vec{0, 1, 2, 3}; + model0.operations[0].outputs = hidl_vec{4}; + + driver->getSupportedOperations(model0, cb); + BOOST_TEST((int)errorStatus == (int)ErrorStatus::NONE); + BOOST_TEST(supported.size() == (size_t)1); + BOOST_TEST(supported[0] == true); + + neuralnetworks::V1_0::Model model1 = {}; + + AddInputOperand (model1, hidl_vec{1, 3}); AddTensorOperand(model1, hidl_vec{1, 3}, weightValue); AddTensorOperand(model1, hidl_vec{1}, biasValue); - AddIntOperand(model1, actValue); + AddIntOperand (model1, actValue); AddOutputOperand(model1, hidl_vec{1, 1}); - // make a correct fully connected operation model1.operations.resize(2); - model1.operations[0].type = V1_0::OperationType::FULLY_CONNECTED; + + // Make a correct fully connected operation + model1.operations[0].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED; model1.operations[0].inputs = hidl_vec{0, 1, 2, 3}; model1.operations[0].outputs = hidl_vec{4}; - // make an incorrect fully connected operation - AddIntOperand(model1, actValue); + // Add an incorrect fully connected operation + AddIntOperand (model1, actValue); AddOutputOperand(model1, hidl_vec{1, 1}); - model1.operations[1].type = V1_0::OperationType::FULLY_CONNECTED; - model1.operations[1].inputs = hidl_vec{4}; + model1.operations[1].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED; + model1.operations[1].inputs = hidl_vec{4}; // Only 1 input operand, expected 4 model1.operations[1].outputs = hidl_vec{5}; driver->getSupportedOperations(model1, cb); - BOOST_TEST((int)error == (int)ErrorStatus::NONE); - BOOST_TEST(sup[0] == true); - BOOST_TEST(sup[1] == false); - // Broadcast add/mul are not supported - V1_0::Model model2 = {}; - - AddInputOperand(model2, hidl_vec{1, 1, 3, 4}); - AddInputOperand(model2, hidl_vec{4}); +#if defined(ARMNN_ANDROID_P) + // In Android P, android::nn::validateModel returns INVALID_ARGUMENT, because of the wrong number of inputs for the + // fully connected layer (1 instead of 4) + BOOST_TEST((int)errorStatus == (int)ErrorStatus::INVALID_ARGUMENT); + BOOST_TEST(supported.empty()); +#else + // In Android O, android::nn::validateModel indicates that the second (wrong) fully connected layer in unsupported + // in the vector of flags returned by the callback + BOOST_TEST((int)errorStatus == (int)ErrorStatus::NONE); + BOOST_TEST(supported.size() == (size_t)2); + BOOST_TEST(supported[0] == true); + BOOST_TEST(supported[1] == false); +#endif + + // Test Broadcast on add/mul operators + neuralnetworks::V1_0::Model model2 = {}; + + AddInputOperand (model2, hidl_vec{1, 1, 3, 4}); + AddInputOperand (model2, hidl_vec{4}); + AddIntOperand (model2, actValue); AddOutputOperand(model2, hidl_vec{1, 1, 3, 4}); AddOutputOperand(model2, hidl_vec{1, 1, 3, 4}); model2.operations.resize(2); - model2.operations[0].type = V1_0::OperationType::ADD; - model2.operations[0].inputs = hidl_vec{0,1}; - model2.operations[0].outputs = hidl_vec{2}; + model2.operations[0].type = neuralnetworks::V1_0::OperationType::ADD; + model2.operations[0].inputs = hidl_vec{0, 1, 2}; + model2.operations[0].outputs = hidl_vec{3}; - model2.operations[1].type = V1_0::OperationType::MUL; - model2.operations[1].inputs = hidl_vec{0,1}; - model2.operations[1].outputs = hidl_vec{3}; + model2.operations[1].type = neuralnetworks::V1_0::OperationType::MUL; + model2.operations[1].inputs = hidl_vec{0, 1, 2}; + model2.operations[1].outputs = hidl_vec{4}; driver->getSupportedOperations(model2, cb); - BOOST_TEST((int)error == (int)ErrorStatus::NONE); - BOOST_TEST(sup[0] == false); - BOOST_TEST(sup[1] == false); + BOOST_TEST((int)errorStatus == (int)ErrorStatus::NONE); + BOOST_TEST(supported.size() == (size_t)2); + BOOST_TEST(supported[0] == true); + BOOST_TEST(supported[1] == true); - V1_0::Model model3 = {}; + neuralnetworks::V1_0::Model model3 = {}; - // Add unsupported operation, should return no error but we don't support it - AddInputOperand(model3, hidl_vec{1, 1, 1, 8}); - AddIntOperand(model3, 2); + AddInputOperand (model3, hidl_vec{1, 1, 1, 8}); + AddIntOperand (model3, 2); AddOutputOperand(model3, hidl_vec{1, 2, 2, 2}); + model3.operations.resize(1); - model3.operations[0].type = V1_0::OperationType::DEPTH_TO_SPACE; - model1.operations[0].inputs = hidl_vec{0, 1}; + + // Add unsupported operation, should return no error but we don't support it + model3.operations[0].type = neuralnetworks::V1_0::OperationType::DEPTH_TO_SPACE; + model3.operations[0].inputs = hidl_vec{0, 1}; model3.operations[0].outputs = hidl_vec{2}; driver->getSupportedOperations(model3, cb); - BOOST_TEST((int)error == (int)ErrorStatus::NONE); - BOOST_TEST(sup[0] == false); + BOOST_TEST((int)errorStatus == (int)ErrorStatus::NONE); + BOOST_TEST(supported.size() == (size_t)1); + BOOST_TEST(supported[0] == false); + + neuralnetworks::V1_0::Model model4 = {}; - // Add invalid operation - V1_0::Model model4 = {}; AddIntOperand(model4, 0); + model4.operations.resize(1); - model4.operations[0].type = static_cast(100); + + // Add invalid operation + model4.operations[0].type = static_cast(100); model4.operations[0].outputs = hidl_vec{0}; driver->getSupportedOperations(model4, cb); - BOOST_TEST((int)error == (int)ErrorStatus::INVALID_ARGUMENT); + BOOST_TEST((int)errorStatus == (int)ErrorStatus::INVALID_ARGUMENT); + BOOST_TEST(supported.empty()); } // The purpose of this test is to ensure that when encountering an unsupported operation -// it is skipped and getSupportedOperations() continues (rather than failing and stopping). -// As per IVGCVSW-710. +// it is skipped and getSupportedOperations() continues (rather than failing and stopping). +// As per IVGCVSW-710. BOOST_AUTO_TEST_CASE(UnsupportedLayerContinueOnFailure) { auto driver = std::make_unique(DriverOptions(armnn::Compute::CpuRef)); - ErrorStatus error; - std::vector sup; + ErrorStatus errorStatus; + std::vector supported; - ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector& supported) + auto cb = [&](ErrorStatus _errorStatus, const std::vector& _supported) { - error = status; - sup = supported; + errorStatus = _errorStatus; + supported = _supported; }; - V1_0::Model model = {}; + neuralnetworks::V1_0::Model model = {}; - // operands + // Operands int32_t actValue = 0; float weightValue[] = {2, 4, 1}; float biasValue[] = {4}; - // broadcast add is unsupported at the time of writing this test, but any unsupported layer will do - AddInputOperand(model, hidl_vec{1, 1, 3, 4}); - AddInputOperand(model, hidl_vec{4}); + // HASHTABLE_LOOKUP is unsupported at the time of writing this test, but any unsupported layer will do + AddInputOperand (model, hidl_vec{1, 1, 3, 4}, neuralnetworks::V1_0::OperandType::TENSOR_INT32); + AddInputOperand (model, hidl_vec{4}, neuralnetworks::V1_0::OperandType::TENSOR_INT32); + AddInputOperand (model, hidl_vec{1, 1, 3, 4}); AddOutputOperand(model, hidl_vec{1, 1, 3, 4}); + AddOutputOperand(model, hidl_vec{1, 1, 3, 4}, neuralnetworks::V1_0::OperandType::TENSOR_QUANT8_ASYMM); - // fully connected - AddInputOperand(model, hidl_vec{1, 3}); + // Fully connected is supported + AddInputOperand (model, hidl_vec{1, 3}); AddTensorOperand(model, hidl_vec{1, 3}, weightValue); AddTensorOperand(model, hidl_vec{1}, biasValue); - AddIntOperand(model, actValue); + AddIntOperand (model, actValue); AddOutputOperand(model, hidl_vec{1, 1}); - // broadcast mul is unsupported + // EMBEDDING_LOOKUP is unsupported AddOutputOperand(model, hidl_vec{1, 1, 3, 4}); model.operations.resize(3); - // unsupported - model.operations[0].type = V1_0::OperationType::ADD; - model.operations[0].inputs = hidl_vec{0,1}; - model.operations[0].outputs = hidl_vec{2}; + // Unsupported + model.operations[0].type = neuralnetworks::V1_0::OperationType::HASHTABLE_LOOKUP; + model.operations[0].inputs = hidl_vec{0, 1, 2}; + model.operations[0].outputs = hidl_vec{3, 4}; - // supported - model.operations[1].type = V1_0::OperationType::FULLY_CONNECTED; - model.operations[1].inputs = hidl_vec{3, 4, 5, 6}; - model.operations[1].outputs = hidl_vec{7}; + // Supported + model.operations[1].type = neuralnetworks::V1_0::OperationType::FULLY_CONNECTED; + model.operations[1].inputs = hidl_vec{5, 6, 7, 8}; + model.operations[1].outputs = hidl_vec{9}; - // unsupported - model.operations[2].type = V1_0::OperationType::MUL; - model.operations[2].inputs = hidl_vec{0,1}; - model.operations[2].outputs = hidl_vec{8}; + // Unsupported + model.operations[2].type = neuralnetworks::V1_0::OperationType::EMBEDDING_LOOKUP; + model.operations[2].inputs = hidl_vec{1, 2}; + model.operations[2].outputs = hidl_vec{10}; - // we are testing that the unsupported layers return false and the test continues - // rather than failing and stopping. + // We are testing that the unsupported layers return false and the test continues rather than failing and stopping driver->getSupportedOperations(model, cb); - BOOST_TEST((int)error == (int)ErrorStatus::NONE); - BOOST_TEST(sup[0] == false); - BOOST_TEST(sup[1] == true); - BOOST_TEST(sup[2] == false); + BOOST_TEST((int)errorStatus == (int)ErrorStatus::NONE); + BOOST_TEST(supported.size() == (size_t)3); + BOOST_TEST(supported[0] == false); + BOOST_TEST(supported[1] == true); + BOOST_TEST(supported[2] == false); } // The purpose of this test is to ensure that when encountering an failure -// during mem pool mapping we properly report an error to the framework via a callback +// during mem pool mapping we properly report an error to the framework via a callback BOOST_AUTO_TEST_CASE(ModelToINetworkConverterMemPoolFail) { auto driver = std::make_unique(armnn::Compute::CpuRef); - ErrorStatus error; - std::vector sup; + ErrorStatus errorStatus; + std::vector supported; - ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector& supported) + auto cb = [&](ErrorStatus _errorStatus, const std::vector& _supported) { - error = status; - sup = supported; + errorStatus = _errorStatus; + supported = _supported; }; - V1_0::Model model = {}; + neuralnetworks::V1_0::Model model = {}; model.pools = hidl_vec{hidl_memory("Unsuported hidl memory type", nullptr, 0)}; - //memory pool mapping should fail, we should report an error + // Memory pool mapping should fail, we should report an error driver->getSupportedOperations(model, cb); - BOOST_TEST((int)error == (int)ErrorStatus::GENERAL_FAILURE); + BOOST_TEST((int)errorStatus != (int)ErrorStatus::NONE); + BOOST_TEST(supported.empty()); } BOOST_AUTO_TEST_SUITE_END() diff --git a/test/Lstm.cpp b/test/Lstm.cpp new file mode 100644 index 00000000..1b6ef60d --- /dev/null +++ b/test/Lstm.cpp @@ -0,0 +1,1397 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "DriverTestHelpers.hpp" +#include +#include +#include + +#include "OperationsUtils.h" + +#include + +BOOST_AUTO_TEST_SUITE(LstmTests) + +using ArmnnDriver = armnn_driver::ArmnnDriver; +using DriverOptions = armnn_driver::DriverOptions; +using namespace driverTestHelpers; +using namespace android::hardware; + +namespace +{ + +template +RequestArgument CreateRequestArgument(std::vector value, unsigned int poolIndex) +{ + DataLocation inputInloc = {}; + inputInloc.poolIndex = poolIndex; + inputInloc.offset = 0; + inputInloc.length = value.size() * sizeof(T); + RequestArgument inputRequestArgument = {}; + inputRequestArgument.location = inputInloc; + inputRequestArgument.dimensions = hidl_vec{}; + return inputRequestArgument; +} + +// Returns true if the relative difference between two float values is less than the tolerance value given. +// This is used because the floating point comparison tolerance (set on each BOOST_AUTO_TEST_CASE) does not work! +bool TolerantCompareEqual(float a, float b, float tolerance = 0.00001f) +{ + float rd; + if (a == 0.0f) + { + rd = fabs(b); + } + else if (b == 0.0f) + { + rd = fabs(a); + } + else + { + rd = boost::math::relative_difference(a, b); + } + return rd < tolerance; +} + +} // namespace + +// Add our own tests here since we fail the lstm tests which Google supplies (because of non-const weights) + +void LstmTestImpl(hidl_vec inputDimensions, + std::vector inputValue, + hidl_vec inputToInputWeightsDimensions, + float* inputToInputWeightsValue, + hidl_vec inputToForgetWeightsDimensions, + float* inputToForgetWeightsValue, + hidl_vec inputToCellWeightsDimensions, + float* inputToCellWeightsValue, + hidl_vec inputToOutputWeightsDimensions, + float* inputToOutputWeightsValue, + hidl_vec recurrentToInputWeightsDimensions, + float* recurrentToInputWeightsValue, + hidl_vec recurrentToForgetWeightsDimensions, + float* recurrentToForgetWeightsValue, + hidl_vec recurrentToCellWeightsDimensions, + float* recurrentToCellWeightsValue, + hidl_vec recurrentToOutputWeightsDimensions, + float* recurrentToOutputWeightsValue, + hidl_vec cellToInputWeightsDimensions, + float* cellToInputWeightsValue, + hidl_vec cellToForgetWeightsDimensions, + float* cellToForgetWeightsValue, + hidl_vec cellToOutputWeightsDimensions, + float* cellToOutputWeightsValue, + hidl_vec inputGateBiasDimensions, + float* inputGateBiasValue, + hidl_vec forgetGateBiasDimensions, + float* forgetGateBiasValue, + hidl_vec cellBiasDimensions, + float* cellBiasValue, + hidl_vec outputGateBiasDimensions, + float* outputGateBiasValue, + hidl_vec projectionWeightsDimensions, + float* projectionWeightsValue, + hidl_vec projectionBiasDimensions, + float* projectionBiasValue, + hidl_vec outputStateInDimensions, + std::vector outputStateInValue, + hidl_vec cellStateInDimensions, + std::vector cellStateInValue, + hidl_vec activationFunctionDimensions, + int32_t* activationFunctionValue, + hidl_vec cellClippingThresholdDimensions, + float* cellClippingThresholdValue, + hidl_vec projectionClippingThresholdDimensions, + float* projectionClippingThresholdValue, + hidl_vec scratchBufferDimensions, + std::vector scratchBufferValue, + hidl_vec outputStateOutDimensions, + std::vector outputStateOutValue, + hidl_vec cellStateOutDimensions, + std::vector cellStateOutValue, + hidl_vec outputDimensions, + std::vector outputValue) +{ + auto driver = std::make_unique(DriverOptions(armnn::Compute::GpuAcc)); + neuralnetworks::V1_0::Model model = {}; + + // Inputs: + // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where + // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input. + AddInputOperand(model, inputDimensions); + + // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size], where “num_units” corresponds to the number of cell units. + AddTensorOperand(model, inputToInputWeightsDimensions, inputToInputWeightsValue); + // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + AddTensorOperand(model, inputToForgetWeightsDimensions, inputToForgetWeightsValue); + // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size]. + AddTensorOperand(model, inputToCellWeightsDimensions, inputToCellWeightsValue); + // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + AddTensorOperand(model, inputToOutputWeightsDimensions, inputToOutputWeightsValue); + // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e., + // “num_units”), or the second dimension of the “projection_weights”, if defined. + AddTensorOperand(model, recurrentToInputWeightsDimensions, recurrentToInputWeightsValue); + // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + AddTensorOperand(model, recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue); + // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + AddTensorOperand(model, recurrentToCellWeightsDimensions, recurrentToCellWeightsValue); + // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + AddTensorOperand(model, recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue); + // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + AddTensorOperand(model, cellToInputWeightsDimensions, cellToInputWeightsValue); + // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + AddTensorOperand(model, cellToForgetWeightsDimensions, cellToForgetWeightsValue); + // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + AddTensorOperand(model, cellToOutputWeightsDimensions, cellToOutputWeightsValue); + // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + AddTensorOperand(model, inputGateBiasDimensions, inputGateBiasValue); + // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + AddTensorOperand(model, forgetGateBiasDimensions, forgetGateBiasValue); + // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + AddTensorOperand(model, cellBiasDimensions, cellBiasValue); + // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + AddTensorOperand(model, outputGateBiasDimensions, outputGateBiasValue); + // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [output_size, num_units]. + AddTensorOperand(model, projectionWeightsDimensions, projectionWeightsValue); + // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size]. + AddTensorOperand(model, projectionBiasDimensions, projectionBiasValue); + + // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + AddInputOperand(model, outputStateInDimensions); + // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + AddInputOperand(model, cellStateInDimensions); + + // constant scalar values (the VTS test adds these as tensors of dim {}) + // 20: The activation function: A value indicating the activation function: + // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid. + AddTensorOperand(model, activationFunctionDimensions, + activationFunctionValue, OperandType::INT32); + // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip]. + // If set to 0.0 then clipping is disabled. + AddTensorOperand(model, cellClippingThresholdDimensions, + cellClippingThresholdValue, OperandType::FLOAT32); + // 22: The clipping threshold: for the output from the projection layer, such that values are bound within + // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. + AddTensorOperand(model, projectionClippingThresholdDimensions, + projectionClippingThresholdValue, OperandType::FLOAT32); + + // Outputs: + // 0: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with + // CIFG, or [batch_size, num_units * 3] without CIFG. + AddOutputOperand(model, scratchBufferDimensions); + // 1: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + AddOutputOperand(model, outputStateOutDimensions); + // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + AddOutputOperand(model, cellStateOutDimensions); + // 3: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is + // effectively the same as the current “output state (out)” value. + AddOutputOperand(model, outputDimensions); + + // make the lstm operation + model.operations.resize(1); + model.operations[0].type = neuralnetworks::V1_0::OperationType::LSTM; + model.operations[0].inputs = + hidl_vec {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22}; + model.operations[0].outputs = hidl_vec {23, 24, 25, 26}; + + // define the input values + hidl_vec inputArguments; + inputArguments.resize(3); + + inputArguments[0] = CreateRequestArgument(inputValue, 0); + inputArguments[1] = CreateRequestArgument(outputStateInValue, 1); + inputArguments[2] = CreateRequestArgument(cellStateInValue, 2); + + // define the expected output values + hidl_vec outputArguments; + outputArguments.resize(4); + + outputArguments[0] = CreateRequestArgument(scratchBufferValue, 3); + outputArguments[1] = CreateRequestArgument(outputStateOutValue, 4); + outputArguments[2] = CreateRequestArgument(cellStateOutValue, 5); + outputArguments[3] = CreateRequestArgument(outputValue, 6); + + Request request = {}; + request.inputs = inputArguments; + request.outputs = outputArguments; + + // set the input data + AddPoolAndSetData(inputValue.size(), request, inputValue.data()); + AddPoolAndSetData(outputStateInValue.size(), request, outputStateInValue.data()); + AddPoolAndSetData(cellStateInValue.size(), request, cellStateInValue.data()); + + // add memory for the outputs + AddPoolAndGetData(scratchBufferValue.size(), request); + android::sp outputStateOutMemory = AddPoolAndGetData(outputStateOutValue.size(), request); + float* outputStateOutData = static_cast(static_cast(outputStateOutMemory->getPointer())); + android::sp cellStateOutMemory = AddPoolAndGetData(cellStateOutValue.size(), request); + float* cellStateOutData = static_cast(static_cast(cellStateOutMemory->getPointer())); + android::sp outputMemory = AddPoolAndGetData(outputValue.size(), request); + float* outputData = static_cast(static_cast(outputMemory->getPointer())); + + // make the prepared model and run the execution + android::sp preparedModel = PrepareModel(model, *driver); + if (preparedModel.get() != nullptr) + { + Execute(preparedModel, request); + } + + // check the results + for (size_t i = 0; i < outputStateOutValue.size(); ++i) + { + BOOST_TEST(TolerantCompareEqual(outputStateOutValue[i], outputStateOutData[i]), + "outputStateOut[" << i << "]: " << outputStateOutValue[i] << " != " << outputStateOutData[i]); + } + for (size_t i = 0; i < cellStateOutValue.size(); ++i) + { + BOOST_TEST(TolerantCompareEqual(cellStateOutValue[i], cellStateOutData[i]), + "cellStateOut[" << i << "]: " << cellStateOutValue[i] << " != " << cellStateOutData[i]); + } + for (size_t i = 0; i < outputValue.size(); ++i) + { + BOOST_TEST(TolerantCompareEqual(outputValue[i], outputData[i]), + "output[" << i << "]: " << outputValue[i] << " != " << outputData[i]); + } +} + +BOOST_AUTO_TEST_CASE(LstmNoCifgNoPeepholeNoProjection) +{ + // This replicates android/frameworks/ml/nn/runtime/test/generated/vts_models/lstm.model.cpp + // with values from android/frameworks/ml/nn/runtime/test/generated/examples/lstm.example.cpp + // and weights, biases and scalars passed as CONSTANT_COPY tensors (instead of MODEL_INPUT tensors). + + // Inputs: + // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where + // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input. + hidl_vec inputDimensions({1, 2}); + std::vector inputValue {2.0f, 3.0f}; + + // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size], where “num_units” corresponds to the number of cell units. + hidl_vec inputToInputWeightsDimensions({4, 2}); + float inputToInputWeightsValue[] = {-0.45018822f, -0.02338299f, + -0.08705890f, -0.34550029f, + 0.04266912f, -0.15680569f, + -0.34856534f, 0.43890524f}; + // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + hidl_vec inputToForgetWeightsDimensions({4, 2}); + float inputToForgetWeightsValue[] = { 0.09701663f, 0.20334584f, + -0.50592935f, -0.31343272f, + -0.40032279f, 0.44781327f, + 0.01387155f, -0.35593212f}; + // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size]. + hidl_vec inputToCellWeightsDimensions({4, 2}); + float inputToCellWeightsValue[] = {-0.50013041f, 0.13702840f, + 0.11810488f, 0.20131630f, + -0.20583314f, 0.44344562f, + 0.22077113f, -0.29909778f}; + // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + hidl_vec inputToOutputWeightsDimensions({4, 2}); + float inputToOutputWeightsValue[] = {-0.25065863f, -0.28290087f, + 0.04613829f, 0.40525138f, + 0.44272184f, 0.03897077f, + -0.15568960f, 0.19487578f}; + // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e., + // “num_units”), or the second dimension of the “projection_weights”, if defined. + hidl_vec recurrentToInputWeightsDimensions({4, 4}); + float recurrentToInputWeightsValue[] = {-0.00635350f, -0.20423880f, 0.31454784f, -0.35746509f, + 0.28902304f, 0.08183324f, -0.16555229f, 0.02286911f, + -0.13566875f, 0.03034258f, 0.48091322f, -0.12528998f, + 0.24077177f, -0.51332325f, -0.33502164f, 0.10629296f}; + // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToForgetWeightsDimensions({4, 4}); + float recurrentToForgetWeightsValue[] = {-0.48684245f, -0.06655136f, 0.42224967f, 0.21126390f, + 0.27654213f, 0.20864892f, -0.07646349f, 0.45877004f, + 0.00141793f, -0.14609534f, 0.36447752f, 0.09196436f, + 0.28053468f, 0.01560611f, -0.20127171f, -0.01140004f}; + // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToCellWeightsDimensions({4, 4}); + float recurrentToCellWeightsValue[] = {-0.34074140f, 0.24443203f, -0.20785320f, 0.26320225f, + 0.05695659f, -0.00123841f, -0.47447860f, -0.35869038f, + -0.06418842f, -0.13502428f, -0.50176400f, 0.22830659f, + -0.46367589f, 0.26016325f, -0.03894562f, -0.16368064f}; + // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToOutputWeightsDimensions({4, 4}); + float recurrentToOutputWeightsValue[] = { 0.43385774f, -0.17194885f, 0.27182370f, 0.09215671f, + 0.24107647f, -0.39835793f, 0.18212086f, 0.01301402f, + 0.48572797f, -0.50656658f, 0.20047462f, -0.20607421f, + -0.51818722f, -0.15390486f, 0.04681480f, 0.39922136f}; + // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToInputWeightsDimensions({0}); + float cellToInputWeightsValue[] = {}; + // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToForgetWeightsDimensions({0}); + float cellToForgetWeightsValue[] = {}; + // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToOutputWeightsDimensions({0}); + float cellToOutputWeightsValue[] = {}; + // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec inputGateBiasDimensions({4}); + float inputGateBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f}; + // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec forgetGateBiasDimensions({4}); + float forgetGateBiasValue[] = {1.0f, 1.0f, 1.0f, 1.0f}; + // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellBiasDimensions({4}); + float cellBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f}; + // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec outputGateBiasDimensions({4}); + float outputGateBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f}; + // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [output_size, num_units]. + hidl_vec projectionWeightsDimensions({0}); + float projectionWeightsValue[] = {}; + // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size]. + hidl_vec projectionBiasDimensions({0}); + float projectionBiasValue[] = {}; + + // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + hidl_vec outputStateInDimensions({1, 4}); + std::vector outputStateInValue {0, 0, 0, 0}; + // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + hidl_vec cellStateInDimensions({1, 4}); + std::vector cellStateInValue {0, 0, 0, 0}; + + // constant scalar values (the VTS test adds these as tensors of dim {}) + // 20: The activation function: A value indicating the activation function: + // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid. + hidl_vec activationFunctionDimensions({}); + int32_t activationFunctionValue[] = {4}; + // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip]. + // If set to 0.0 then clipping is disabled. + hidl_vec cellClippingThresholdDimensions({}); + float cellClippingThresholdValue[] = {0.0f}; + // 22: The clipping threshold: for the output from the projection layer, such that values are bound within + // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. + hidl_vec projectionClippingThresholdDimensions({}); + float projectionClippingThresholdValue[] = {0.0f}; + + // Outputs: + // 0: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with + // CIFG, or [batch_size, num_units * 3] without CIFG. + hidl_vec scratchBufferDimensions({1, 12}); + std::vector scratchBufferValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + // 1: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + hidl_vec outputStateOutDimensions({1, 4}); + std::vector outputStateOutValue {-0.0297319f, 0.122947f, 0.208851f, -0.153588f}; + // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + hidl_vec cellStateOutDimensions({1, 4}); + std::vector cellStateOutValue {-0.145439f, 0.157475f, 0.293663f, -0.277353f}; + // 3: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is + // effectively the same as the current “output state (out)” value. + hidl_vec outputDimensions({1, 4}); + std::vector outputValue {-0.02973187f, 0.1229473f, 0.20885126f, -0.15358765f}; + + LstmTestImpl(inputDimensions, inputValue, + inputToInputWeightsDimensions, inputToInputWeightsValue, + inputToForgetWeightsDimensions, inputToForgetWeightsValue, + inputToCellWeightsDimensions, inputToCellWeightsValue, + inputToOutputWeightsDimensions, inputToOutputWeightsValue, + recurrentToInputWeightsDimensions, recurrentToInputWeightsValue, + recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue, + recurrentToCellWeightsDimensions, recurrentToCellWeightsValue, + recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue, + cellToInputWeightsDimensions, cellToInputWeightsValue, + cellToForgetWeightsDimensions, cellToForgetWeightsValue, + cellToOutputWeightsDimensions, cellToOutputWeightsValue, + inputGateBiasDimensions, inputGateBiasValue, + forgetGateBiasDimensions, forgetGateBiasValue, + cellBiasDimensions, cellBiasValue, + outputGateBiasDimensions, outputGateBiasValue, + projectionWeightsDimensions, projectionWeightsValue, + projectionBiasDimensions, projectionBiasValue, + outputStateInDimensions, outputStateInValue, + cellStateInDimensions, cellStateInValue, + activationFunctionDimensions, activationFunctionValue, + cellClippingThresholdDimensions, cellClippingThresholdValue, + projectionClippingThresholdDimensions, projectionClippingThresholdValue, + scratchBufferDimensions, scratchBufferValue, + outputStateOutDimensions, outputStateOutValue, + cellStateOutDimensions, cellStateOutValue, + outputDimensions, outputValue); +} + +BOOST_AUTO_TEST_CASE(LstmCifgPeepholeNoProjection) +{ + // This replicates android/frameworks/ml/nn/runtime/test/generated/vts_models/lstm2.model.cpp + // with values from android/frameworks/ml/nn/runtime/test/generated/examples/lstm2.example.cpp + // and weights, biases and scalars passed as CONSTANT_COPY tensors (instead of MODEL_INPUT tensors). + + // Inputs: + // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where + // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input. + hidl_vec inputDimensions({1, 2}); + std::vector inputValue {2.0f, 3.0f}; + + // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size], where “num_units” corresponds to the number of cell units. + hidl_vec inputToInputWeightsDimensions({0}); + float inputToInputWeightsValue[] = {}; + // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + hidl_vec inputToForgetWeightsDimensions({4, 2}); + float inputToForgetWeightsValue[] = {-0.55291498f, -0.42866567f, + 0.13056988f, -0.36333650f, + -0.22755712f, 0.28253698f, + 0.24407166f, 0.33826375f}; + // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size]. + hidl_vec inputToCellWeightsDimensions({4, 2}); + float inputToCellWeightsValue[] = {-0.49770179f, -0.27711356f, + -0.09624726f, 0.05100781f, + 0.04717243f, 0.48944736f, + -0.38535351f, -0.17212132f}; + // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + hidl_vec inputToOutputWeightsDimensions({4, 2}); + float inputToOutputWeightsValue[] = { 0.10725588f, -0.02335852f, + -0.55932593f, -0.09426838f, + -0.44257352f, 0.54939759f, + 0.01533556f, 0.42751634f}; + // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e., + // “num_units”), or the second dimension of the “projection_weights”, if defined. + hidl_vec recurrentToInputWeightsDimensions({0}); // VTS was {4, 4} -> {0} ? + float recurrentToInputWeightsValue[] = {}; + // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToForgetWeightsDimensions({4, 4}); + float recurrentToForgetWeightsValue[] = {-0.13832897f, -0.05151010f, -0.23590070f, -0.16661474f, + -0.14340827f, 0.36986142f, 0.23414481f, 0.55899000f, + 0.10798943f, -0.41174671f, 0.17751795f, -0.34484994f, + -0.35874045f, -0.11352962f, 0.27268326f, 0.54058349f}; + // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToCellWeightsDimensions({4, 4}); + float recurrentToCellWeightsValue[] = { 0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f, + 0.42957711f, 0.01841056f, -0.32764608f, -0.33027974f, + -0.10826075f, 0.20675004f, 0.19069612f, -0.03026325f, + -0.54532051f, 0.33003211f, 0.44901288f, 0.21193194f}; + // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToOutputWeightsDimensions({4, 4}); + float recurrentToOutputWeightsValue[] = { 0.41613156f, 0.42610586f, -0.16495961f, -0.56638730f, + 0.30579174f, -0.05115908f, -0.33941799f, 0.23364776f, + 0.11178309f, 0.09481031f, -0.26424935f, 0.46261835f, + 0.50248802f, 0.26114327f, -0.43736315f, 0.33149987f}; + // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToInputWeightsDimensions({0}); + float cellToInputWeightsValue[] = {}; + // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToForgetWeightsDimensions({4}); + float cellToForgetWeightsValue[] = {0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f}; + // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToOutputWeightsDimensions({4}); + float cellToOutputWeightsValue[] = {-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f}; + // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec inputGateBiasDimensions({0}); // VTS was {4} -> {0} ? + float inputGateBiasValue[] = {}; + // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec forgetGateBiasDimensions({4}); + float forgetGateBiasValue[] = {1.0f, 1.0f, 1.0f, 1.0f}; + // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellBiasDimensions({4}); + float cellBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f}; + // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec outputGateBiasDimensions({4}); + float outputGateBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f}; + // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [output_size, num_units]. + hidl_vec projectionWeightsDimensions({0}); + float projectionWeightsValue[] = {}; + // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size]. + hidl_vec projectionBiasDimensions({0}); + float projectionBiasValue[] = {}; + + // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + hidl_vec outputStateInDimensions({1, 4}); + std::vector outputStateInValue {0, 0, 0, 0}; + // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + hidl_vec cellStateInDimensions({1, 4}); + std::vector cellStateInValue {0, 0, 0, 0}; + + // constant scalar values (the VTS test adds these as tensors of dim {}) + // 20: The activation function: A value indicating the activation function: + // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid. + hidl_vec activationFunctionDimensions({}); + int32_t activationFunctionValue[] = {4}; + // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip]. + // If set to 0.0 then clipping is disabled. + hidl_vec cellClippingThresholdDimensions({}); + float cellClippingThresholdValue[] = {0.0f}; + // 22: The clipping threshold: for the output from the projection layer, such that values are bound within + // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. + hidl_vec projectionClippingThresholdDimensions({}); + float projectionClippingThresholdValue[] = {0.0f}; + + // Outputs: + // 0: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with + // CIFG, or [batch_size, num_units * 3] without CIFG. + hidl_vec scratchBufferDimensions({1, 16}); // VTS was {1, 12} -> {1, 16} + std::vector scratchBufferValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + // 1: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + hidl_vec outputStateOutDimensions({1, 4}); + std::vector outputStateOutValue {-0.364445f, -0.00352185f, 0.128866f, -0.0516365f}; + // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + hidl_vec cellStateOutDimensions({1, 4}); + std::vector cellStateOutValue {-0.760444f, -0.0180416f, 0.182264f, -0.0649371f}; + // 3: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is + // effectively the same as the current “output state (out)” value. + hidl_vec outputDimensions({1, 4}); + std::vector outputValue {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f}; + + LstmTestImpl(inputDimensions, inputValue, + inputToInputWeightsDimensions, inputToInputWeightsValue, + inputToForgetWeightsDimensions, inputToForgetWeightsValue, + inputToCellWeightsDimensions, inputToCellWeightsValue, + inputToOutputWeightsDimensions, inputToOutputWeightsValue, + recurrentToInputWeightsDimensions, recurrentToInputWeightsValue, + recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue, + recurrentToCellWeightsDimensions, recurrentToCellWeightsValue, + recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue, + cellToInputWeightsDimensions, cellToInputWeightsValue, + cellToForgetWeightsDimensions, cellToForgetWeightsValue, + cellToOutputWeightsDimensions, cellToOutputWeightsValue, + inputGateBiasDimensions, inputGateBiasValue, + forgetGateBiasDimensions, forgetGateBiasValue, + cellBiasDimensions, cellBiasValue, + outputGateBiasDimensions, outputGateBiasValue, + projectionWeightsDimensions, projectionWeightsValue, + projectionBiasDimensions, projectionBiasValue, + outputStateInDimensions, outputStateInValue, + cellStateInDimensions, cellStateInValue, + activationFunctionDimensions, activationFunctionValue, + cellClippingThresholdDimensions, cellClippingThresholdValue, + projectionClippingThresholdDimensions, projectionClippingThresholdValue, + scratchBufferDimensions, scratchBufferValue, + outputStateOutDimensions, outputStateOutValue, + cellStateOutDimensions, cellStateOutValue, + outputDimensions, outputValue); +} + +BOOST_AUTO_TEST_CASE(LstmNoCifgPeepholeProjection) +{ + // This replicates android/frameworks/ml/nn/runtime/test/generated/vts_models/lstm3.model.cpp + // with values from android/frameworks/ml/nn/runtime/test/generated/examples/lstm3.example.cpp + // and weights, biases and scalars passed as CONSTANT_COPY tensors (instead of MODEL_INPUT tensors). + + // Inputs: + // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where + // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input. + hidl_vec inputDimensions({2, 5}); + std::vector inputValue {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f, + 0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f}; + + // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size], where “num_units” corresponds to the number of cell units. + hidl_vec inputToInputWeightsDimensions({20, 5}); + float inputToInputWeightsValue[] = { 0.0213936830f, 0.0612455100f, 0.0469051670f, -0.0146576770f, -0.0314946300f, + 0.0917180300f, 0.1464780100f, 0.1079719300f, -0.0057968358f, 0.0019193048f, + -0.2726754000f, 0.1015402900f, -0.0185398850f, 0.0803498850f, -0.1026238500f, + -0.0225997870f, -0.0912115500f, -0.0086759670f, -0.0452061030f, -0.0821282000f, + -0.0080459520f, 0.0154780810f, 0.0552172470f, 0.0387195870f, 0.0441536270f, + -0.0645324300f, 0.0503182500f, -0.0469351080f, -0.0081644309f, 0.0145742260f, + -0.1671009000f, -0.1551955200f, -0.1681979700f, -0.1397126900f, -0.1195305900f, + 0.2500548700f, -0.2279098300f, 0.0098550870f, -0.0281409580f, -0.1120069800f, + 0.1129540800f, -0.0035217577f, 0.0544850750f, 0.0518469500f, 0.0647112060f, + 0.1098919300f, 0.1167478600f, 0.0349060700f, 0.0772735700f, 0.1139058500f, + -0.1863375000f, -0.1034451000f, -0.1394518900f, -0.0494012270f, -0.1876706300f, + 0.0424839030f, 0.1423355200f, 0.1383258100f, 0.1835016500f, 0.1454560300f, + -0.0285457040f, 0.0249395310f, 0.0509297180f, 0.0076203286f, -0.0029723682f, + -0.0424842240f, -0.1182759600f, -0.0917110400f, -0.1080862800f, -0.1632798800f, + -0.2273378000f, -0.0993647000f, -0.0171551070f, 0.0023917493f, 0.0492727640f, + 0.0038534778f, 0.0547645050f, 0.0897537840f, 0.0694723400f, 0.0801447600f, + -0.0454423400f, -0.0497073000f, -0.0713563100f, -0.0489291060f, -0.0040420120f, + -0.0092840260f, 0.0180420540f, 0.0036860977f, -0.0742730200f, -0.1143460400f, + -0.0189954560f, 0.0314875430f, 0.0128349080f, 0.0199777540f, 0.0442566540f, + -0.3929261300f, -0.1851933400f, -0.1165128100f, -0.0680989200f, 0.0113736770f}; + // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + hidl_vec inputToForgetWeightsDimensions({20, 5}); + float inputToForgetWeightsValue[] = {-0.0018401089f, -0.0048522370f, 0.0369842400f, 0.0141817040f, 0.0282732360f, + -0.0167261940f, -0.0524975900f, -0.1020426100f, 0.0086106600f, -0.0409795050f, + -0.0098991870f, 0.0192389200f, -0.0281772690f, -0.0853510300f, -0.1458549500f, + 0.1066256700f, -0.0190973100f, -0.0178835340f, -0.0047269356f, -0.0451033230f, + 0.0030784295f, 0.0767847750f, 0.0746369600f, 0.0945313950f, 0.0814421000f, + -0.1225789900f, -0.0339457580f, -0.0313034650f, 0.0456306260f, 0.0684388700f, + -0.1349294500f, -0.0124800070f, -0.0811829000f, -0.0722449900f, -0.0962879100f, + 0.0451009460f, 0.0012300825f, 0.0139646620f, 0.0993723940f, 0.0254305900f, + 0.0695832400f, 0.0342572960f, 0.0482646000f, 0.0626799700f, 0.0526250680f, + 0.1278466600f, 0.0707789700f, 0.0257259350f, 0.0416500900f, 0.0724190500f, + 0.0186686440f, -0.0373772940f, -0.0627778300f, -0.0883363600f, -0.0401206050f, + -0.0114055860f, -0.0078083350f, -0.0103013860f, -0.0051021670f, 0.0277174640f, + 0.0548342300f, 0.1144911100f, 0.1128965200f, 0.1093983900f, 0.1339650600f, + -0.0840216600f, -0.0190146200f, -0.0446783040f, -0.0772056500f, 0.0143500630f, + -0.1175795800f, -0.0652038000f, -0.0818573300f, -0.0767543240f, -0.0926143750f, + 0.1040549100f, 0.0529603360f, 0.0357558950f, 0.0358393860f, -0.0125405530f, + 0.0368812980f, 0.0291337600f, 0.0342015900f, 0.0544844700f, -0.0545233530f, + 0.0258271500f, 0.0232735500f, -0.0118571790f, -0.0011980024f, -0.0346417170f, + -0.0261250940f, -0.1758261500f, -0.1592365700f, -0.2748677400f, -0.0006143371f, + 0.0001771948f, -8.470171e-05f, 0.0265180700f, 0.0457907650f, 0.069564960f}; + // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size]. + hidl_vec inputToCellWeightsDimensions({20, 5}); + float inputToCellWeightsValue[] = {-0.0458028300f, -0.0954946200f, -0.0324189850f, -0.0645463300f, -0.0435284530f, + 0.0430185870f, -0.0491523440f, -0.1241814400f, -0.0789854750f, -0.0759688900f, + 0.0194843620f, -0.1143496200f, -0.0074034138f, -0.0631484400f, -0.0929814950f, + 0.0062155537f, -0.0250343380f, -0.0028890965f, 0.0489295270f, 0.0623507500f, + 0.1066591800f, -0.0320367920f, -0.0850591600f, -0.1084335800f, -0.1300243300f, + -0.0368164370f, -0.0213013400f, -0.0165182390f, 0.0047691227f, -0.0025825808f, + 0.0660178660f, 0.0299915340f, -0.1065283600f, -0.1037554000f, -0.1305607100f, + -0.0326664300f, -0.0337024140f, -0.0064734240f, -0.0461169200f, 0.0144193390f, + -0.0251743230f, 0.0396852000f, 0.0817775060f, 0.0615746800f, 0.1021009500f, + -0.0096581940f, 0.0465117170f, 0.0360390600f, 0.0069369148f, 0.0159600950f, + -0.0650766600f, 0.0955159800f, 0.0535688360f, 0.0640871400f, 0.1283566700f, + -0.0087143290f, -0.2021196600f, -0.1209367400f, 0.0294504720f, 0.2849013000f, + -0.0292279010f, 0.1164364000f, -0.0856026300f, 0.0994178600f, -0.0369995650f, + -0.0288426260f, -0.0033637602f, -0.0170129020f, -0.0972086500f, -0.1119335100f, + -0.0291551170f, -0.0179360340f, -0.0097689360f, -0.0422332400f, -0.0361596350f, + 0.0650511200f, -0.0217428920f, -0.0233772120f, -0.0722136400f, -0.0643055200f, + 0.0545386500f, 0.0911498140f, 0.0638733100f, 0.0075183930f, 0.0559609530f, + 0.0697793440f, 0.0464111680f, 0.1050991100f, 0.0746389400f, 0.0075130584f, + 0.0128509820f, 0.0455543100f, 0.0569556880f, 0.0655528500f, 0.0508014560f, + -0.0098626830f, 0.0082677200f, -0.0265556090f, -0.0073611983f, -0.0014897042f}; + // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + hidl_vec inputToOutputWeightsDimensions({20, 5}); + float inputToOutputWeightsValue[] = {-0.0998932000f, -0.0720195600f, -0.0528037730f, -0.1562959300f, -0.1500191800f, + -0.0765075100f, 0.0235985500f, -0.0751553550f, -0.0803770900f, -0.1509353400f, + 0.0295175520f, -0.0475139300f, 0.0103505310f, -0.0266485100f, -0.0168397220f, + -0.0231211630f, 0.0077019283f, 0.0128512570f, -0.0504064900f, -0.0129761000f, + -0.0217377470f, -0.0383057930f, -0.0687058600f, -0.0148124700f, -0.0012853940f, + 0.1012423600f, 0.0831228350f, 0.0533130060f, -0.0622356460f, -0.0756371540f, + -0.0278339030f, 0.0297749710f, 0.1130802000f, 0.0921890600f, 0.0950613500f, + -0.0866657640f, -0.0371627060f, -0.0388809140f, -0.0358328450f, -0.0144815640f, + -0.0982500300f, -0.1204856900f, -0.0976655860f, -0.0528763300f, -0.0964047000f, + -0.1136642900f, 0.0357775050f, 0.1356881900f, 0.0524513830f, 0.0506493040f, + 0.0579895100f, -0.0218523350f, -0.0998488440f, 0.0147404750f, -0.0788979460f, + 0.0497469900f, 0.0141604730f, 0.0697393200f, 0.0496494200f, 0.0333646460f, + 0.0819012400f, 0.0255353670f, 0.0508931650f, 0.0485142540f, 0.0694581300f, + -0.0789075640f, -0.0670761600f, -0.1184450800f, -0.0998668800f, -0.0750940300f, + 0.0626322600f, 0.1492558700f, 0.2018843600f, 0.1209845100f, 0.1463941500f, + 0.0015017595f, -0.0142673820f, -0.0341725700f, 0.0127114680f, 0.0028300495f, + -0.0247584820f, -0.0509854800f, -0.0821182000f, 0.0142256720f, 0.0215441580f, + 0.0894972500f, 0.0750526800f, -0.0020780868f, 0.0490825800f, 0.0647629500f, + -0.0229070630f, 0.0275624560f, 0.0401857350f, 0.0195675770f, -0.0155987390f, + -0.0490973030f, -0.0171218660f, -0.0833682340f, -0.0233200200f, -0.084095600f}; + // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e., + // “num_units”), or the second dimension of the “projection_weights”, if defined. + hidl_vec recurrentToInputWeightsDimensions({20, 16}); + float recurrentToInputWeightsValue[] = { + -0.001374326f, -0.078856036f, 0.10672688f, 0.029162422f, // 00 + -0.11585556f, 0.02557986f, -0.13446963f, -0.035785314f, + -0.01244275f, 0.025961924f, -0.02337298f, -0.044228926f, + -0.055839065f, -0.046598054f, -0.010546039f, -0.06900766f, + 0.027239809f, 0.022582639f, -0.013296484f, -0.05459212f, // 01 + 0.08981f, -0.045407712f, 0.08682226f, -0.06867011f, + -0.14390695f, -0.02916037f, 0.000996957f, 0.091420636f, + 0.14283475f, -0.07390571f, -0.06402044f, 0.062524505f, + -0.093129106f, 0.04860203f, -0.08364217f, -0.08119002f, // 02 + 0.009352075f, 0.22920375f, 0.0016303885f, 0.11583097f, + -0.13732095f, 0.012405723f, -0.07551853f, 0.06343048f, + 0.12162708f, -0.031923793f, -0.014335606f, 0.01790974f, + -0.10650317f, -0.0724401f, 0.08554849f, -0.05727212f, // 03 + 0.06556731f, -0.042729504f, -0.043227166f, 0.011683251f, + -0.013082158f, -0.029302018f, -0.010899579f, -0.062036745f, + -0.022509435f, -0.00964907f, -0.01567329f, 0.04260106f, + -0.07787477f, -0.11576462f, 0.017356863f, 0.048673786f, // 04 + -0.017577527f, -0.05527947f, -0.082487635f, -0.040137455f, + -0.10820036f, -0.04666372f, 0.022746278f, -0.07851417f, + 0.01068115f, 0.032956902f, 0.022433773f, 0.0026891115f, + 0.08944216f, -0.0685835f, 0.010513544f, 0.07228705f, // 05 + 0.02032331f, -0.059686817f, -0.0005566496f, -0.086984694f, + 0.040414046f, -0.1380399f, 0.094208956f, -0.05722982f, + 0.012092817f, -0.04989123f, -0.086576f, -0.003399834f, + -0.04696032f, -0.045747425f, 0.10091314f, 0.048676282f, // 06 + -0.029037097f, 0.031399418f, -0.0040285117f, 0.047237843f, + 0.09504992f, 0.041799378f, -0.049185462f, -0.031518843f, + -0.10516937f, 0.026374253f, 0.10058866f, -0.0033195973f, + -0.041975245f, 0.0073591834f, 0.0033782164f, -0.004325073f, // 07 + -0.10167381f, 0.042500053f, -0.01447153f, 0.06464186f, + -0.017142897f, 0.03312627f, 0.009205989f, 0.024138335f, + -0.011337001f, 0.035530265f, -0.010912711f, 0.0706555f, + -0.005894094f, 0.051841937f, -0.1401738f, -0.02351249f, // 08 + 0.0365468f, 0.07590991f, 0.08838724f, 0.021681072f, + -0.10086113f, 0.019608743f, -0.06195883f, 0.077335775f, + 0.023646897f, -0.095322326f, 0.02233014f, 0.09756986f, + -0.048691444f, -0.009579111f, 0.07595467f, 0.11480546f, // 09 + -0.09801813f, 0.019894179f, 0.08502348f, 0.004032281f, + 0.037211012f, 0.068537936f, -0.048005626f, -0.091520436f, + -0.028379958f, -0.01556313f, 0.06554592f, -0.045599163f, + -0.01672207f, -0.020169014f, -0.011877351f, -0.20212261f, // 10 + 0.010889619f, 0.0047078193f, 0.038385306f, 0.08540671f, + -0.017140968f, -0.0035865551f, 0.016678626f, 0.005633034f, + 0.015963363f, 0.00871737f, 0.060130805f, 0.028611384f, + 0.10109069f, -0.015060172f, -0.07894427f, 0.06401885f, // 11 + 0.011584063f, -0.024466386f, 0.0047652307f, -0.09041358f, + 0.030737216f, -0.0046374933f, 0.14215417f, -0.11823516f, + 0.019899689f, 0.006106124f, -0.027092824f, 0.0786356f, + 0.05052217f, -0.058925f, -0.011402121f, -0.024987547f, // 12 + -0.0013661642f, -0.06832946f, -0.015667673f, -0.1083353f, + -0.00096863037f, -0.06988685f, -0.053350925f, -0.027275559f, + -0.033664223f, -0.07978348f, -0.025200296f, -0.017207067f, + -0.058403496f, -0.055697463f, 0.005798788f, 0.12965427f, // 13 + -0.062582195f, 0.0013350133f, -0.10482091f, 0.0379771f, + 0.072521195f, -0.0029455067f, -0.13797039f, -0.03628521f, + 0.013806405f, -0.017858358f, -0.01008298f, -0.07700066f, + -0.017081132f, 0.019358726f, 0.0027079724f, 0.004635139f, // 14 + 0.062634714f, -0.02338735f, -0.039547626f, -0.02050681f, + 0.03385117f, -0.083611414f, 0.002862572f, -0.09421313f, + 0.058618143f, -0.08598433f, 0.00972939f, 0.023867095f, + -0.053934585f, -0.023203006f, 0.07452513f, -0.048767887f, // 15 + -0.07314807f, -0.056307215f, -0.10433547f, -0.06440842f, + 0.04328182f, 0.04389765f, -0.020006588f, -0.09076438f, + -0.11652589f, -0.021705797f, 0.03345259f, -0.010329105f, + -0.025767034f, 0.013057034f, -0.07316461f, -0.10145612f, // 16 + 0.06358255f, 0.18531723f, 0.07759293f, 0.12006465f, + 0.1305557f, 0.058638252f, -0.03393652f, 0.09622831f, + -0.16253184f, -2.4580743e-06f, 0.079869635f, -0.070196845f, + -0.005644518f, 0.06857898f, -0.12598175f, -0.035084512f, // 17 + 0.03156317f, -0.12794146f, -0.031963028f, 0.04692781f, + 0.030070418f, 0.0071660685f, -0.095516115f, -0.004643372f, + 0.040170413f, -0.062104587f, -0.0037324072f, 0.0554317f, + 0.08184801f, -0.019164372f, 0.06791302f, 0.034257166f, // 18 + -0.10307039f, 0.021943003f, 0.046745934f, 0.0790918f, + -0.0265588f, -0.007824208f, 0.042546265f, -0.00977924f, + -0.0002440307f, -0.017384544f, -0.017990116f, 0.12252321f, + -0.014512694f, -0.08251313f, 0.08861942f, 0.13589665f, // 19 + 0.026351685f, 0.012641483f, 0.07466548f, 0.044301085f, + -0.045414884f, -0.051112458f, 0.03444247f, -0.08502782f, + -0.04106223f, -0.028126027f, 0.028473156f, 0.10467447f}; + // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToForgetWeightsDimensions({20, 16}); + float recurrentToForgetWeightsValue[] = { + -0.057784554f, -0.026057621f, -0.068447545f, -0.022581743f, // 00 + 0.14811787f, 0.10826372f, 0.09471067f, 0.03987225f, + -0.0039523416f, 0.00030638507f, 0.053185795f, 0.10572994f, + 0.08414449f, -0.022036452f, -0.00066928595f, -0.09203576f, + 0.032950465f, -0.10985798f, -0.023809856f, 0.0021431844f, // 01 + -0.02196096f, -0.00326074f, 0.00058621005f, -0.074678116f, + -0.06193199f, 0.055729095f, 0.03736828f, 0.020123724f, + 0.061878487f, -0.04729229f, 0.034919553f, -0.07585433f, + -0.04421272f, -0.044019096f, 0.085488975f, 0.04058006f, // 02 + -0.06890133f, -0.030951202f, -0.024628663f, -0.07672815f, + 0.034293607f, 0.08556707f, -0.05293577f, -0.033561368f, + -0.04899627f, 0.0241671f, 0.015736353f, -0.095442444f, + -0.029564252f, 0.016493602f, -0.035026584f, 0.022337519f, // 03 + -0.026871363f, 0.004780428f, 0.0077918363f, -0.03601621f, + 0.016435321f, -0.03263031f, -0.09543275f, -0.047392778f, + 0.013454138f, 0.028934088f, 0.01685226f, -0.086110644f, + -0.046250615f, -0.01847454f, 0.047608484f, 0.07339695f, // 04 + 0.034546845f, -0.04881143f, 0.009128804f, -0.08802852f, + 0.03761666f, 0.008096139f, -0.014454086f, 0.014361001f, + -0.023502491f, -0.0011840804f, -0.07607001f, 0.001856849f, + -0.06509276f, -0.006021153f, -0.08570962f, -0.1451793f, // 05 + 0.060212336f, 0.055259194f, 0.06974018f, 0.049454916f, + -0.027794661f, -0.08077226f, -0.016179763f, 0.1169753f, + 0.17213494f, -0.0056326236f, -0.053934924f, -0.0124349f, + -0.11520337f, 0.05409887f, 0.088759385f, 0.0019655675f, // 06 + 0.0042065294f, 0.03881498f, 0.019844765f, 0.041858196f, + -0.05695512f, 0.047233116f, 0.038937137f, -0.06542224f, + 0.014429736f, -0.09719407f, 0.13908425f, -0.05379757f, + 0.012321099f, 0.082840554f, -0.029899208f, 0.044217527f, // 07 + 0.059855383f, 0.07711018f, -0.045319796f, 0.0948846f, + -0.011724666f, -0.0033288454f, -0.033542685f, -0.04764985f, + -0.13873616f, 0.040668588f, 0.034832682f, -0.015319203f, + -0.018715994f, 0.046002675f, 0.0599172f, -0.043107376f, // 08 + 0.0294216f, -0.002314414f, -0.022424703f, 0.0030315618f, + 0.0014641669f, 0.0029166266f, -0.11878115f, 0.013738511f, + 0.12375372f, -0.0006038222f, 0.029104086f, 0.087442465f, + 0.052958444f, 0.07558703f, 0.04817258f, 0.044462286f, // 09 + -0.015213451f, -0.08783778f, -0.0561384f, -0.003008196f, + 0.047060397f, -0.002058388f, 0.03429439f, -0.018839769f, + 0.024734668f, 0.024614193f, -0.042046934f, 0.09597743f, + -0.0043254104f, 0.04320769f, 0.0064070094f, -0.0019131786f, // 10 + -0.02558259f, -0.022822596f, -0.023273505f, -0.02464396f, + -0.10991725f, -0.006240552f, 0.0074488563f, 0.024044557f, + 0.04383914f, -0.046476185f, 0.028658995f, 0.060410924f, + 0.050786525f, 0.009452605f, -0.0073054377f, -0.024810238f, // 11 + 0.0052906186f, 0.0066939713f, -0.0020913032f, 0.014515517f, + 0.015898481f, 0.021362653f, -0.030262267f, 0.016587038f, + -0.011442813f, 0.041154444f, -0.007631438f, -0.03423484f, + -0.010977775f, 0.036152758f, 0.0066366293f, 0.11915515f, // 12 + 0.02318443f, -0.041350313f, 0.021485701f, -0.10906167f, + -0.028218046f, -0.00954771f, 0.020531068f, -0.11995105f, + -0.03672871f, 0.024019798f, 0.014255957f, -0.05221243f, + -0.00661567f, -0.04630967f, 0.033188973f, 0.10107534f, // 13 + -0.014027541f, 0.030796422f, -0.10270911f, -0.035999842f, + 0.15443139f, 0.07684145f, 0.036571592f, -0.035900835f, + -0.0034699554f, 0.06209149f, 0.015920248f, -0.031122351f, + -0.03858649f, 0.01849943f, 0.13872518f, 0.01503974f, // 14 + 0.069941424f, -0.06948533f, -0.0088794185f, 0.061282158f, + -0.047401894f, 0.03100163f, -0.041533746f, -0.10430945f, + 0.044574402f, -0.01425562f, -0.024290353f, 0.034563623f, + 0.05866852f, 0.023947537f, -0.09445152f, 0.035450947f, // 15 + 0.02247216f, -0.0042998926f, 0.061146557f, -0.10250651f, + 0.020881841f, -0.06747029f, 0.10062043f, -0.0023941975f, + 0.03532124f, -0.016341697f, 0.09685456f, -0.016764693f, + 0.051808182f, 0.05875331f, -0.04536488f, 0.001626336f, // 16 + -0.028892258f, -0.01048663f, -0.009793449f, -0.017093895f, + 0.010987891f, 0.02357273f, -0.00010856845f, 0.0099760275f, + -0.001845119f, -0.03551521f, 0.0018358806f, 0.05763657f, + -0.01769146f, 0.040995963f, 0.02235177f, -0.060430344f, // 17 + 0.11475477f, -0.023854522f, 0.10071741f, 0.0686208f, + -0.014250481f, 0.034261297f, 0.047418304f, 0.08562733f, + -0.030519066f, 0.0060542435f, 0.014653856f, -0.038836084f, + 0.04096551f, 0.032249358f, -0.08355519f, -0.026823482f, // 18 + 0.056386515f, -0.010401743f, -0.028396193f, 0.08507674f, + 0.014410365f, 0.020995233f, 0.17040324f, 0.11511526f, + 0.02459721f, 0.0066619175f, 0.025853224f, -0.023133837f, + -0.081302024f, 0.017264642f, -0.009585969f, 0.09491168f, // 19 + -0.051313367f, 0.054532815f, -0.014298593f, 0.10657464f, + 0.007076659f, 0.10964551f, 0.0409152f, 0.008275321f, + -0.07283536f, 0.07937492f, 0.04192024f, -0.1075027f}; + // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToCellWeightsDimensions({20, 16}); + float recurrentToCellWeightsValue[] = { + -0.037322544f, 0.018592842f, 0.0056175636f, -0.06253426f, + 0.055647098f, -0.05713207f, -0.05626563f, 0.005559383f, + 0.03375411f, -0.025757805f, -0.088049285f, 0.06017052f, + -0.06570978f, 0.007384076f, 0.035123326f, -0.07920549f, + 0.053676967f, 0.044480428f, -0.07663568f, 0.0071805613f, + 0.08089997f, 0.05143358f, 0.038261272f, 0.03339287f, + -0.027673481f, 0.044746667f, 0.028349208f, 0.020090483f, + -0.019443132f, -0.030755889f, -0.0040000007f, 0.04465846f, + -0.021585021f, 0.0031670958f, 0.0053199246f, -0.056117613f, + -0.10893326f, 0.076739706f, -0.08509834f, -0.027997585f, + 0.037871376f, 0.01449768f, -0.09002357f, -0.06111149f, + -0.046195522f, 0.0422062f, -0.005683705f, -0.1253618f, + -0.012925729f, -0.04890792f, 0.06985068f, 0.037654128f, + 0.03398274f, -0.004781977f, 0.007032333f, -0.031787455f, + 0.010868644f, -0.031489216f, 0.09525667f, 0.013939797f, + 0.0058680447f, 0.0167067f, 0.02668468f, -0.04797466f, + -0.048885044f, -0.12722108f, 0.035304096f, 0.06554885f, + 0.00972396f, -0.039238118f, -0.05159735f, -0.11329045f, + 0.1613692f, -0.03750952f, 0.06529313f, -0.071974665f, + -0.11769596f, 0.015524369f, -0.0013754242f, -0.12446318f, + 0.02786344f, -0.014179351f, 0.005264273f, 0.14376344f, + 0.015983658f, 0.03406988f, -0.06939408f, 0.040699873f, + 0.02111075f, 0.09669095f, 0.041345075f, -0.08316494f, + -0.07684199f, -0.045768797f, 0.032298047f, -0.041805092f, + 0.0119405f, 0.0061010392f, 0.12652606f, 0.0064572375f, + -0.024950314f, 0.11574242f, 0.04508852f, -0.04335324f, + 0.06760663f, -0.027437469f, 0.07216407f, 0.06977076f, + -0.05438599f, 0.034033038f, -0.028602652f, 0.05346137f, + 0.043184172f, -0.037189785f, 0.10420091f, 0.00882477f, + -0.054019816f, -0.074273005f, -0.030617684f, -0.0028467078f, + 0.024302477f, -0.0038869337f, 0.005332455f, 0.0013399826f, + 0.04361412f, -0.007001822f, 0.09631092f, -0.06702025f, + -0.042049985f, -0.035070654f, -0.04103342f, -0.10273396f, + 0.0544271f, 0.037184782f, -0.13150354f, -0.0058036847f, + -0.008264958f, 0.042035464f, 0.05891794f, 0.029673764f, + 0.0063542654f, 0.044788733f, 0.054816857f, 0.062257513f, + -0.00093483756f, 0.048938446f, -0.004952862f, -0.007730018f, + -0.04043371f, -0.017094059f, 0.07229206f, -0.023670016f, + -0.052195564f, -0.025616996f, -0.01520939f, 0.045104615f, + -0.007376126f, 0.003533447f, 0.006570588f, 0.056037236f, + 0.12436656f, 0.051817212f, 0.028532185f, -0.08686856f, + 0.11868599f, 0.07663395f, -0.07323171f, 0.03463402f, + -0.050708205f, -0.04458982f, -0.11590894f, 0.021273347f, + 0.1251325f, -0.15313013f, -0.12224372f, 0.17228661f, + 0.023029093f, 0.086124025f, 0.006445803f, -0.03496501f, + 0.028332196f, 0.04449512f, -0.042436164f, -0.026587414f, + -0.006041347f, -0.09292539f, -0.05678812f, 0.03897832f, + 0.09465633f, 0.008115513f, -0.02171956f, 0.08304309f, + 0.071401566f, 0.019622514f, 0.032163795f, -0.004167056f, + 0.02295182f, 0.030739572f, 0.056506045f, 0.004612461f, + 0.06524936f, 0.059999723f, 0.046395954f, -0.0045512207f, + -0.1335546f, -0.030136576f, 0.11584653f, -0.014678886f, + 0.0020118146f, -0.09688814f, -0.0790206f, 0.039770417f, + -0.0329582f, 0.07922767f, 0.029322514f, 0.026405897f, + 0.04207835f, -0.07073373f, 0.063781224f, 0.0859677f, + -0.10925287f, -0.07011058f, 0.048005477f, 0.03438226f, + -0.09606514f, -0.006669445f, -0.043381985f, 0.04240257f, + -0.06955775f, -0.06769346f, 0.043903265f, -0.026784198f, + -0.017840602f, 0.024307009f, -0.040079936f, -0.019946516f, + 0.045318738f, -0.12233574f, 0.026170589f, 0.0074471775f, + 0.15978073f, 0.10185836f, 0.10298046f, -0.015476589f, + -0.039390966f, -0.072174534f, 0.0739445f, -0.1211869f, + -0.0347889f, -0.07943156f, 0.014809798f, -0.12412325f, + -0.0030663363f, 0.039695457f, 0.0647603f, -0.08291318f, + -0.018529687f, -0.004423833f, 0.0037507233f, 0.084633216f, + -0.01514876f, -0.056505352f, -0.012800942f, -0.06994386f, + 0.012962922f, -0.031234352f, 0.07029052f, 0.016418684f, + 0.03618972f, 0.055686004f, -0.08663945f, -0.017404709f, + -0.054761406f, 0.029065743f, 0.052404847f, 0.020238016f, + 0.0048197987f, -0.0214882f, 0.07078733f, 0.013016777f, + 0.06262858f, 0.009184685f, 0.020785125f, -0.043904778f, + -0.0270329f, -0.03299152f, -0.060088247f, -0.015162964f, + -0.001828936f, 0.12642565f, -0.056757294f, 0.013586685f, + 0.09232601f, -0.035886683f, 0.06000002f, 0.05229691f, + -0.052580316f, -0.082029596f, -0.010794592f, 0.012947712f, + -0.036429964f, -0.085508935f, -0.13127148f, -0.017744139f, + 0.031502828f, 0.036232427f, -0.031581745f, 0.023051167f, + -0.05325106f, -0.03421577f, 0.028793324f, -0.034633752f, + -0.009881397f, -0.043551125f, -0.018609839f, 0.0019097115f, + -0.008799762f, 0.056595087f, 0.0022273948f, 0.055752404f}; + // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToOutputWeightsDimensions({20, 16}); + float recurrentToOutputWeightsValue[] = { + 0.025825322f, -0.05813119f, 0.09495884f, -0.045984812f, + -0.01255415f, -0.0026479573f, -0.08196161f, -0.054914974f, + -0.0046604523f, -0.029587349f, -0.044576716f, -0.07480124f, + -0.082868785f, 0.023254942f, 0.027502948f, -0.0039728214f, + -0.08683098f, -0.08116779f, -0.014675607f, -0.037924774f, + -0.023314456f, -0.007401714f, -0.09255757f, 0.029460307f, + -0.08829125f, -0.005139627f, -0.08989442f, -0.0555066f, + 0.13596267f, -0.025062224f, -0.048351806f, -0.03850004f, + 0.07266485f, -0.022414139f, 0.05940088f, 0.075114764f, + 0.09597592f, -0.010211725f, -0.0049794707f, -0.011523867f, + -0.025980417f, 0.072999895f, 0.11091378f, -0.081685916f, + 0.014416728f, 0.043229222f, 0.034178585f, -0.07530371f, + 0.035837382f, -0.085607f, -0.007721233f, -0.03287832f, + -0.043848954f, -0.06404588f, -0.06632928f, -0.073643476f, + 0.008214239f, -0.045984086f, 0.039764922f, 0.03474462f, + 0.060612556f, -0.080590084f, 0.049127717f, 0.04151091f, + -0.030063879f, 0.008801774f, -0.023021035f, -0.019558564f, + 0.05158114f, -0.010947698f, -0.011825728f, 0.0075720972f, + 0.0699727f, -0.0039981045f, 0.069350146f, 0.08799282f, + 0.016156472f, 0.035502106f, 0.11695009f, 0.006217345f, + 0.13392477f, -0.037875112f, 0.025745004f, 0.08940699f, + -0.00924166f, 0.0046702605f, -0.036598757f, -0.08811812f, + 0.10522024f, -0.032441203f, 0.008176899f, -0.04454919f, + 0.07058152f, 0.0067963637f, 0.039206743f, 0.03259838f, + 0.03725492f, -0.09515802f, 0.013326398f, -0.052055415f, + -0.025676316f, 0.03198509f, -0.015951829f, -0.058556724f, + 0.036879618f, 0.043357447f, 0.028362012f, -0.05908629f, + 0.0059240665f, -0.04995891f, -0.019187413f, 0.0276265f, + -0.01628143f, 0.0025863599f, 0.08800015f, 0.035250366f, + -0.022165963f, -0.07328642f, -0.009415526f, -0.07455109f, + 0.11690406f, 0.0363299f, 0.07411125f, 0.042103454f, + -0.009660886f, 0.019076364f, 0.018299393f, -0.046004917f, + 0.08891175f, 0.0431396f, -0.026327137f, -0.051502608f, + 0.08979574f, -0.051670972f, 0.04940282f, -0.07491107f, + -0.021240504f, 0.022596184f, -0.034280192f, 0.060163025f, + -0.058211457f, -0.051837247f, -0.01349775f, -0.04639988f, + -0.035936575f, -0.011681591f, 0.064818054f, 0.0073146066f, + -0.021745546f, -0.043124277f, -0.06471268f, -0.07053354f, + -0.029321948f, -0.05330136f, 0.016933719f, -0.053782392f, + 0.13747959f, -0.1361751f, -0.11569455f, 0.0033329215f, + 0.05693899f, -0.053219706f, 0.063698f, 0.07977434f, + -0.07924483f, 0.06936997f, 0.0034815092f, -0.007305279f, + -0.037325785f, -0.07251102f, -0.033633437f, -0.08677009f, + 0.091591336f, -0.14165086f, 0.021752775f, 0.019683983f, + 0.0011612234f, -0.058154266f, 0.049996935f, 0.0288841f, + -0.0024567875f, -0.14345716f, 0.010955264f, -0.10234828f, + 0.1183656f, -0.0010731248f, -0.023590032f, -0.072285876f, + -0.0724771f, -0.026382286f, -0.0014920527f, 0.042667855f, + 0.0018776858f, 0.02986552f, 0.009814309f, 0.0733756f, + 0.12289186f, 0.018043943f, -0.0458958f, 0.049412545f, + 0.033632483f, 0.05495232f, 0.036686596f, -0.013781798f, + -0.010036754f, 0.02576849f, -0.08307328f, 0.010112348f, + 0.042521734f, -0.05869831f, -0.071689695f, 0.03876447f, + -0.13275425f, -0.0352966f, -0.023077697f, 0.10285965f, + 0.084736146f, 0.15568255f, -0.00040734606f, 0.027835453f, + -0.10292561f, -0.032401145f, 0.10053256f, -0.026142767f, + -0.08271222f, -0.0030240538f, -0.016368777f, 0.1070414f, + 0.042672627f, 0.013456989f, -0.0437609f, -0.022309763f, + 0.11576483f, 0.04108048f, 0.061026827f, -0.0190714f, + -0.0869359f, 0.037901703f, 0.0610107f, 0.07202949f, + 0.01675338f, 0.086139716f, -0.08795751f, -0.014898893f, + -0.023771819f, -0.01965048f, 0.007955471f, -0.043740474f, + 0.03346837f, -0.10549954f, 0.090567775f, 0.042013682f, + -0.03176985f, 0.12569028f, -0.02421228f, -0.029526481f, + 0.023851605f, 0.031539805f, 0.05292009f, -0.02344001f, + -0.07811758f, -0.08834428f, 0.10094801f, 0.16594367f, + -0.06861939f, -0.021256343f, -0.041093912f, -0.06669611f, + 0.035498552f, 0.021757556f, -0.09302526f, -0.015403468f, + -0.06614931f, -0.051798206f, -0.013874718f, 0.03630673f, + 0.010412845f, -0.08077351f, 0.046185967f, 0.0035662893f, + 0.03541868f, -0.094149634f, -0.034814864f, 0.003128424f, + -0.020674974f, -0.03944324f, -0.008110165f, -0.11113267f, + 0.08484226f, 0.043586485f, 0.040582247f, 0.0968012f, + -0.065249965f, -0.028036479f, 0.0050708856f, 0.0017462453f, + 0.0326779f, 0.041296225f, 0.09164146f, -0.047743853f, + -0.015952192f, -0.034451712f, 0.084197424f, -0.05347844f, + -0.11768019f, 0.085926116f, -0.08251791f, -0.045081906f, + 0.0948852f, 0.068401024f, 0.024856757f, 0.06978981f, + -0.057309967f, -0.012775832f, -0.0032452994f, 0.01977615f, + -0.041040014f, -0.024264973f, 0.063464895f, 0.05431621f}; + // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToInputWeightsDimensions({20}); + float cellToInputWeightsValue[] = {0.040369894f, 0.030746894f, 0.24704495f, 0.018586371f, -0.037586458f, + -0.15312155f, -0.11812848f, -0.11465643f, 0.20259799f, 0.11418174f, + -0.10116027f, -0.011334949f, 0.12411352f, -0.076769054f, -0.052169047f, + 0.21198851f, -0.38871562f, -0.09061183f, -0.09683246f, -0.21929175f}; + // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToForgetWeightsDimensions({20}); + float cellToForgetWeightsValue[] = {-0.01998659f, -0.15568835f, -0.24248174f, -0.012770197f, 0.041331276f, + -0.072311886f, -0.052123554f, -0.0066330447f, -0.043891653f, 0.036225766f, + -0.047248036f, 0.021479502f, 0.033189066f, 0.11952997f, -0.020432774f, + 0.64658105f, -0.06650122f, -0.03467612f, 0.095340036f, 0.23647355f}; + // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToOutputWeightsDimensions({20}); + float cellToOutputWeightsValue[] = {0.08286371f, -0.08261836f, -0.51210177f, 0.002913762f, 0.17764764f, + -0.5495371f, -0.08460716f, -0.24552552f, 0.030037103f, 0.04123544f, + -0.11940523f, 0.007358328f, 0.1890978f, 0.4833202f, -0.34441817f, + 0.36312827f, -0.26375428f, 0.1457655f, -0.19724406f, 0.15548733f}; + // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec inputGateBiasDimensions({20}); + float inputGateBiasValue[] = {0.02234832f, 0.14757581f, 0.18176508f, 0.10380666f, 0.053110216f, + -0.06928846f, -0.13942584f, -0.11816189f, 0.19483899f, 0.03652339f, + -0.10250295f, 0.036714908f, -0.18426876f, 0.036065217f, 0.21810818f, + 0.02383196f, -0.043370757f, 0.08690144f, -0.04444982f, 0.00030581196f}; + // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec forgetGateBiasDimensions({20}); + float forgetGateBiasValue[] = {0.035185695f, -0.042891346f, -0.03032477f, 0.23027696f, 0.11098921f, + 0.15378423f, 0.09263801f, 0.09790885f, 0.09508917f, 0.061199076f, + 0.07665568f, -0.015443159f, -0.03499149f, 0.046190713f, 0.08895977f, + 0.10899629f, 0.40694186f, 0.06030037f, 0.012413437f, -0.06108739f}; + // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellBiasDimensions({20}); + float cellBiasValue[] = {-0.024379363f, 0.0055531194f, 0.23377132f, 0.033463873f, -0.1483596f, + -0.10639995f, -0.091433935f, 0.058573797f, -0.06809782f, -0.07889636f, + -0.043246906f, -0.09829136f, -0.4279842f, 0.034901652f, 0.18797937f, + 0.0075234566f, 0.016178843f, 0.1749513f, 0.13975595f, 0.92058027f}; + // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec outputGateBiasDimensions({20}); + float outputGateBiasValue[] = {0.046159424f, -0.0012809046f, 0.03563469f, 0.12648113f, 0.027195795f, + 0.35373217f, -0.018957434f, 0.008907322f, -0.0762701f, 0.12018895f, + 0.04216877f, 0.0022856654f, 0.040952638f, 0.3147856f, 0.08225149f, + -0.057416286f, -0.14995944f, -0.008040261f, 0.13208859f, 0.029760877f}; + // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [output_size, num_units]. + hidl_vec projectionWeightsDimensions({16, 20}); + float projectionWeightsValue[] = { + -0.009802181f, 0.09401916f, 0.0717386f, -0.13895074f, 0.09641832f, + 0.060420845f, 0.08539281f, 0.054285463f, 0.061395317f, 0.034448683f, + -0.042991187f, 0.019801661f, -0.16840284f, -0.015726732f, -0.23041931f, + -0.024478018f, -0.10959692f, -0.013875541f, 0.18600968f, -0.061274476f, + 0.0138165f, -0.08160894f, -0.07661644f, 0.032372914f, 0.16169067f, + 0.22465782f, -0.03993472f, -0.004017731f, 0.08633481f, -0.28869787f, + 0.08682067f, 0.17240396f, 0.014975425f, 0.056431185f, 0.031037588f, + 0.16702051f, 0.0077946745f, 0.15140012f, 0.29405436f, 0.120285f, + -0.188994f, -0.027265169f, 0.043389652f, -0.022061434f, 0.014777949f, + -0.20203483f, 0.094781205f, 0.19100232f, 0.13987629f, -0.036132768f, + -0.06426278f, -0.05108664f, 0.13221376f, 0.009441198f, -0.16715929f, + 0.15859416f, -0.040437475f, 0.050779544f, -0.022187516f, 0.012166504f, + 0.027685808f, -0.07675938f, -0.0055694645f, -0.09444123f, 0.0046453946f, + 0.050794356f, 0.10770313f, -0.20790008f, -0.07149004f, -0.11425117f, + 0.008225835f, -0.035802525f, 0.14374903f, 0.15262283f, 0.048710253f, + 0.1847461f, -0.007487823f, 0.11000021f, -0.09542012f, 0.22619456f, + -0.029149994f, 0.08527916f, 0.009043713f, 0.0042746216f, 0.016261552f, + 0.022461696f, 0.12689082f, -0.043589946f, -0.12035478f, -0.08361797f, + -0.050666027f, -0.1248618f, -0.1275799f, -0.071875185f, 0.07377272f, + 0.09944291f, -0.18897448f, -0.1593054f, -0.06526116f, -0.040107165f, + -0.004618631f, -0.067624845f, -0.007576253f, 0.10727444f, 0.041546922f, + -0.20424393f, 0.06907816f, 0.050412357f, 0.00724631f, 0.039827548f, + 0.12449835f, 0.10747581f, 0.13708383f, 0.09134148f, -0.12617786f, + -0.06428341f, 0.09956831f, 0.1208086f, -0.14676677f, -0.0727722f, + 0.1126304f, 0.010139365f, 0.015571211f, -0.038128063f, 0.022913318f, + -0.042050496f, 0.16842307f, -0.060597885f, 0.10531834f, -0.06411776f, + -0.07451711f, -0.03410368f, -0.13393489f, 0.06534304f, 0.003620307f, + 0.04490757f, 0.05970546f, 0.05197996f, 0.02839995f, 0.10434969f, + -0.013699693f, -0.028353551f, -0.07260381f, 0.047201227f, -0.024575593f, + -0.036445823f, 0.07155557f, 0.009672501f, -0.02328883f, 0.009533515f, + -0.03606021f, -0.07421458f, -0.028082801f, -0.2678904f, -0.13221288f, + 0.18419984f, -0.13012612f, -0.014588381f, -0.035059117f, -0.04824723f, + 0.07830115f, -0.056184657f, 0.03277091f, 0.025466874f, 0.14494097f, + -0.12522776f, -0.098633975f, -0.10766018f, -0.08317623f, 0.08594209f, + 0.07749552f, 0.039474737f, 0.1776665f, -0.07409566f, -0.0477268f, + 0.29323658f, 0.10801441f, 0.1154011f, 0.013952499f, 0.10739139f, + 0.10708251f, -0.051456142f, 0.0074137426f, -0.10430189f, 0.10034707f, + 0.045594677f, 0.0635285f, -0.0715442f, -0.089667566f, -0.10811871f, + 0.00026344223f, 0.08298446f, -0.009525053f, 0.006585689f, -0.24567553f, + -0.09450807f, 0.09648481f, 0.026996298f, -0.06419476f, -0.04752702f, + -0.11063944f, -0.23441927f, -0.17608605f, -0.052156363f, 0.067035615f, + 0.19271925f, -0.0032889997f, -0.043264326f, 0.09663576f, -0.057112187f, + -0.10100678f, 0.0628376f, 0.04447668f, 0.017961001f, -0.10094388f, + -0.10190601f, 0.18335468f, 0.10494553f, -0.052095775f, -0.0026118709f, + 0.10539724f, -0.04383912f, -0.042349473f, 0.08438151f, -0.1947263f, + 0.02251204f, 0.11216432f, -0.10307853f, 0.17351969f, -0.039091777f, + 0.08066188f, -0.00561982f, 0.12633002f, 0.11335965f, -0.0088127935f, + -0.019777594f, 0.06864014f, -0.059751723f, 0.016233567f, -0.06894641f, + -0.28651384f, -0.004228674f, 0.019708522f, -0.16305895f, -0.07468996f, + -0.0855457f, 0.099339016f, -0.07580735f, -0.13775392f, 0.08434318f, + 0.08330512f, -0.12131499f, 0.031935584f, 0.09180414f, -0.08876437f, + -0.08049874f, 0.008753825f, 0.03498998f, 0.030215185f, 0.03907079f, + 0.089751154f, 0.029194152f, -0.03337423f, -0.019092513f, 0.04331237f, + 0.04299654f, -0.036394123f, -0.12915532f, 0.09793732f, 0.07512415f, + -0.11319543f, -0.032502122f, 0.15661901f, 0.07671967f, -0.005491124f, + -0.19379048f, -0.218606f, 0.21448623f, 0.017840758f, 0.1416943f, + -0.07051762f, 0.19488361f, 0.02664691f, -0.18104725f, -0.09334311f, + 0.15026465f, -0.15493552f, -0.057762887f, -0.11604192f, -0.262013f, + -0.01391798f, 0.012185008f, 0.11156489f, -0.07483202f, 0.06693364f, + -0.26151478f, 0.046425626f, 0.036540434f, -0.16435726f, 0.17338543f, + -0.21401681f, -0.11385144f, -0.08283257f, -0.069031075f, 0.030635102f, + 0.010969227f, 0.11109743f, 0.010919218f, 0.027526086f, 0.13519906f, + 0.01891392f, -0.046839405f, -0.040167913f, 0.017953383f, -0.09700955f, + 0.0061885654f, -0.07000971f, 0.026893595f, -0.038844477f, 0.14543656f}; + // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size]. + hidl_vec projectionBiasDimensions({0}); + float projectionBiasValue[] = {}; + + // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + hidl_vec outputStateInDimensions({2, 16}); + std::vector outputStateInValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + hidl_vec cellStateInDimensions({2, 20}); + std::vector cellStateInValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + // constant scalar values (the VTS test adds these as tensors of dim {}) + // 20: The activation function: A value indicating the activation function: + // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid. + hidl_vec activationFunctionDimensions({}); + int32_t activationFunctionValue[] = {4}; + // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip]. + // If set to 0.0 then clipping is disabled. + hidl_vec cellClippingThresholdDimensions({}); + float cellClippingThresholdValue[] = {0.0f}; + // 22: The clipping threshold: for the output from the projection layer, such that values are bound within + // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. + hidl_vec projectionClippingThresholdDimensions({}); + float projectionClippingThresholdValue[] = {0.0f}; + + // Outputs: + // 0: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with + // CIFG, or [batch_size, num_units * 3] without CIFG. + hidl_vec scratchBufferDimensions({2, 60}); + std::vector scratchBufferValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + // 1: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + hidl_vec outputStateOutDimensions({2, 16}); + std::vector outputStateOutValue { + -0.00396806f, 0.029352f, -0.00279226f, 0.0159977f, -0.00835577f, -0.0211779f, 0.0283512f, -0.0114597f, + 0.00907307f, -0.0244004f, -0.0152191f, -0.0259063f, 0.00914318f, 0.00415119f, 0.017147f, 0.0134203f, + -0.013869f, 0.0287268f, -0.00334694f, 0.00733397f, -0.0287926f, -0.0186926f, 0.0193662f, -0.0115437f, + 0.00422612f, -0.0345232f, 0.00223253f, -0.00957321f, 0.0210624f, 0.013331f, 0.0150954f, 0.0216801f}; + // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + hidl_vec cellStateOutDimensions({2, 20}); + std::vector cellStateOutValue { + -0.0531632f, -0.0118138f, 0.0870833f, 0.0347929f, -0.076144f, + -0.0659219f, -0.0463811f, 0.0141307f, -0.0127706f, -0.03782f, + -0.00402401f, -0.00571876f, -0.187957f, -0.0247127f, 0.0711425f, + 0.008244f, 0.0492649f, 0.126972f, 0.0933097f, 0.29848f, + -0.0966178f, -0.114417f, 0.0387229f, 0.0453255f, -0.181286f, + -0.0651251f, -0.0996879f, -0.00276995f, 0.0617558f, -0.0100728f, + 0.056304f, -0.077416f, -0.162858f, -0.0541251f, 0.0571202f, + -0.0525331f, 0.0724297f, 0.171029f, 0.141738f, 0.295483f}; + // 3: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is + // effectively the same as the current “output state (out)” value. + hidl_vec outputDimensions({2, 16}); + std::vector outputValue { + -0.00396806f, 0.029352f, -0.00279226f, 0.0159977f, -0.00835576f, -0.0211779f, 0.0283512f, -0.0114597f, + 0.00907307f, -0.0244004f, -0.0152191f, -0.0259063f, 0.00914318f, 0.00415118f, 0.017147f, 0.0134203f, + -0.013869f, 0.0287268f, -0.00334693f, 0.00733398f, -0.0287926f, -0.0186926f, 0.0193662f, -0.0115437f, + 0.00422612f, -0.0345232f, 0.00223253f, -0.00957321f, 0.0210624f, 0.013331f, 0.0150954f, 0.02168f}; + + LstmTestImpl(inputDimensions, inputValue, + inputToInputWeightsDimensions, inputToInputWeightsValue, + inputToForgetWeightsDimensions, inputToForgetWeightsValue, + inputToCellWeightsDimensions, inputToCellWeightsValue, + inputToOutputWeightsDimensions, inputToOutputWeightsValue, + recurrentToInputWeightsDimensions, recurrentToInputWeightsValue, + recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue, + recurrentToCellWeightsDimensions, recurrentToCellWeightsValue, + recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue, + cellToInputWeightsDimensions, cellToInputWeightsValue, + cellToForgetWeightsDimensions, cellToForgetWeightsValue, + cellToOutputWeightsDimensions, cellToOutputWeightsValue, + inputGateBiasDimensions, inputGateBiasValue, + forgetGateBiasDimensions, forgetGateBiasValue, + cellBiasDimensions, cellBiasValue, + outputGateBiasDimensions, outputGateBiasValue, + projectionWeightsDimensions, projectionWeightsValue, + projectionBiasDimensions, projectionBiasValue, + outputStateInDimensions, outputStateInValue, + cellStateInDimensions, cellStateInValue, + activationFunctionDimensions, activationFunctionValue, + cellClippingThresholdDimensions, cellClippingThresholdValue, + projectionClippingThresholdDimensions, projectionClippingThresholdValue, + scratchBufferDimensions, scratchBufferValue, + outputStateOutDimensions, outputStateOutValue, + cellStateOutDimensions, cellStateOutValue, + outputDimensions, outputValue); +} + +BOOST_AUTO_TEST_CASE(LstmCifgPeepholeNoProjectionBatch2) +{ + // This replicates android/frameworks/ml/nn/runtime/test/generated/vts_models/lstm2.model.cpp + // with values from android/frameworks/ml/nn/runtime/test/generated/examples/lstm2.example.cpp + // and weights, biases and scalars passed as CONSTANT_COPY tensors (instead of MODEL_INPUT tensors). + // The batch size has been increased to 2 (it was 1 in the VTS test) with appropriate input and output values added. + + uint32_t batchSize = 2; + uint32_t inputSize = 2; + uint32_t numUnits = 4; + uint32_t outputSize = numUnits; + + // Inputs: + // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where + // “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input. + hidl_vec inputDimensions({batchSize, inputSize}); + std::vector inputValue {2.0f, 3.0f, 3.0f, 4.0f}; + + // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size], where “num_units” corresponds to the number of cell units. + hidl_vec inputToInputWeightsDimensions({0}); + float inputToInputWeightsValue[] = {}; + // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + hidl_vec inputToForgetWeightsDimensions({numUnits, inputSize}); + float inputToForgetWeightsValue[] = {-0.55291498f, -0.42866567f, + 0.13056988f, -0.36333650f, + -0.22755712f, 0.28253698f, + 0.24407166f, 0.33826375f}; + // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size]. + hidl_vec inputToCellWeightsDimensions({numUnits, inputSize}); + float inputToCellWeightsValue[] = {-0.49770179f, -0.27711356f, + -0.09624726f, 0.05100781f, + 0.04717243f, 0.48944736f, + -0.38535351f, -0.17212132f}; + // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, input_size]. + hidl_vec inputToOutputWeightsDimensions({numUnits, inputSize}); + float inputToOutputWeightsValue[] = { 0.10725588f, -0.02335852f, + -0.55932593f, -0.09426838f, + -0.44257352f, 0.54939759f, + 0.01533556f, 0.42751634f}; + // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e., + // “num_units”), or the second dimension of the “projection_weights”, if defined. + hidl_vec recurrentToInputWeightsDimensions({0}); // VTS was {4, 4} -> {0} ? + float recurrentToInputWeightsValue[] = {}; + // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToForgetWeightsDimensions({numUnits, outputSize}); + float recurrentToForgetWeightsValue[] = {-0.13832897f, -0.05151010f, -0.23590070f, -0.16661474f, + -0.14340827f, 0.36986142f, 0.23414481f, 0.55899000f, + 0.10798943f, -0.41174671f, 0.17751795f, -0.34484994f, + -0.35874045f, -0.11352962f, 0.27268326f, 0.54058349f}; + // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToCellWeightsDimensions({numUnits, outputSize}); + float recurrentToCellWeightsValue[] = { 0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f, + 0.42957711f, 0.01841056f, -0.32764608f, -0.33027974f, + -0.10826075f, 0.20675004f, 0.19069612f, -0.03026325f, + -0.54532051f, 0.33003211f, 0.44901288f, 0.21193194f}; + // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [num_units, output_size]. + hidl_vec recurrentToOutputWeightsDimensions({numUnits, outputSize}); + float recurrentToOutputWeightsValue[] = { 0.41613156f, 0.42610586f, -0.16495961f, -0.56638730f, + 0.30579174f, -0.05115908f, -0.33941799f, 0.23364776f, + 0.11178309f, 0.09481031f, -0.26424935f, 0.46261835f, + 0.50248802f, 0.26114327f, -0.43736315f, 0.33149987f}; + // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToInputWeightsDimensions({0}); + float cellToInputWeightsValue[] = {}; + // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToForgetWeightsDimensions({numUnits}); + float cellToForgetWeightsValue[] = {0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f}; + // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellToOutputWeightsDimensions({numUnits}); + float cellToOutputWeightsValue[] = {-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f}; + // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec inputGateBiasDimensions({0}); // VTS was {4} -> {0} ? + float inputGateBiasValue[] = {}; + // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec forgetGateBiasDimensions({4}); + float forgetGateBiasValue[] = {1.0f, 1.0f, 1.0f, 1.0f}; + // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec cellBiasDimensions({numUnits}); + float cellBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f}; + // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units]. + hidl_vec outputGateBiasDimensions({numUnits}); + float outputGateBiasValue[] = {0.0f, 0.0f, 0.0f, 0.0f}; + // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape + // [output_size, num_units]. + hidl_vec projectionWeightsDimensions({0}); + float projectionWeightsValue[] = {}; + // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size]. + hidl_vec projectionBiasDimensions({0}); + float projectionBiasValue[] = {}; + + // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + hidl_vec outputStateInDimensions({batchSize, outputSize}); + std::vector outputStateInValue {0, 0, 0, 0, 0, 0, 0, 0}; + // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + hidl_vec cellStateInDimensions({batchSize, numUnits}); + std::vector cellStateInValue {0, 0, 0, 0, 0, 0, 0, 0}; + + // constant scalar values (the VTS test adds these as tensors of dim {}) + // 20: The activation function: A value indicating the activation function: + // 0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid. + hidl_vec activationFunctionDimensions({}); + int32_t activationFunctionValue[] = {4}; + // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip]. + // If set to 0.0 then clipping is disabled. + hidl_vec cellClippingThresholdDimensions({}); + float cellClippingThresholdValue[] = {0.0f}; + // 22: The clipping threshold: for the output from the projection layer, such that values are bound within + // [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. + hidl_vec projectionClippingThresholdDimensions({}); + float projectionClippingThresholdValue[] = {0.0f}; + + // Outputs: + // 0: The scratch buffer: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units * 4] with + // CIFG, or [batch_size, num_units * 3] without CIFG. + hidl_vec scratchBufferDimensions({batchSize, numUnits * 4}); + std::vector scratchBufferValue {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + // 1: The output state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. + hidl_vec outputStateOutDimensions({batchSize, outputSize}); + std::vector outputStateOutValue {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f, + -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f}; + // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units]. + hidl_vec cellStateOutDimensions({batchSize, numUnits}); + std::vector cellStateOutValue {-0.76044439f, -0.01804161f, 0.18226376f, -0.06493707f, + -0.90477051f, -0.04355603f, 0.18475688f, -0.04158677f}; + // 3: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size]. This is + // effectively the same as the current “output state (out)” value. + hidl_vec outputDimensions({batchSize, outputSize}); + std::vector outputValue {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f, + -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f}; + + LstmTestImpl(inputDimensions, inputValue, + inputToInputWeightsDimensions, inputToInputWeightsValue, + inputToForgetWeightsDimensions, inputToForgetWeightsValue, + inputToCellWeightsDimensions, inputToCellWeightsValue, + inputToOutputWeightsDimensions, inputToOutputWeightsValue, + recurrentToInputWeightsDimensions, recurrentToInputWeightsValue, + recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue, + recurrentToCellWeightsDimensions, recurrentToCellWeightsValue, + recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue, + cellToInputWeightsDimensions, cellToInputWeightsValue, + cellToForgetWeightsDimensions, cellToForgetWeightsValue, + cellToOutputWeightsDimensions, cellToOutputWeightsValue, + inputGateBiasDimensions, inputGateBiasValue, + forgetGateBiasDimensions, forgetGateBiasValue, + cellBiasDimensions, cellBiasValue, + outputGateBiasDimensions, outputGateBiasValue, + projectionWeightsDimensions, projectionWeightsValue, + projectionBiasDimensions, projectionBiasValue, + outputStateInDimensions, outputStateInValue, + cellStateInDimensions, cellStateInValue, + activationFunctionDimensions, activationFunctionValue, + cellClippingThresholdDimensions, cellClippingThresholdValue, + projectionClippingThresholdDimensions, projectionClippingThresholdValue, + scratchBufferDimensions, scratchBufferValue, + outputStateOutDimensions, outputStateOutValue, + cellStateOutDimensions, cellStateOutValue, + outputDimensions, outputValue); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/Merger.cpp b/test/Merger.cpp index 48253604..aeaff0cb 100644 --- a/test/Merger.cpp +++ b/test/Merger.cpp @@ -4,28 +4,33 @@ // #include "DriverTestHelpers.hpp" #include "TestTensor.hpp" +#include #include +#include #include BOOST_AUTO_TEST_SUITE(MergerTests) -using ArmnnDriver = armnn_driver::ArmnnDriver; -using DriverOptions = armnn_driver::DriverOptions; +using namespace android::hardware; using namespace driverTestHelpers; +using namespace armnn_driver; namespace { +static const boost::array COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::GpuAcc }}; + void MergerTestImpl(const std::vector & inputs, int32_t concatAxis, const TestTensor & expectedOutputTensor, + armnn::Compute computeDevice, ErrorStatus expectedPrepareStatus=ErrorStatus::NONE, ErrorStatus expectedExecStatus=ErrorStatus::NONE) { - std::unique_ptr driver = std::make_unique(DriverOptions(armnn::Compute::CpuRef)); - V1_0::Model model{}; + std::unique_ptr driver = std::make_unique(DriverOptions(computeDevice)); + neuralnetworks::V1_0::Model model{}; hidl_vec modelInputIds; modelInputIds.resize(inputs.size()+1); @@ -40,7 +45,7 @@ MergerTestImpl(const std::vector & inputs, // make the concat operation model.operations.resize(1); - model.operations[0].type = V1_0::OperationType::CONCATENATION; + model.operations[0].type = neuralnetworks::V1_0::OperationType::CONCATENATION; model.operations[0].inputs = modelInputIds; model.operations[0].outputs = hidl_vec{static_cast(inputs.size()+1)}; @@ -130,7 +135,8 @@ MergerTestImpl(const std::vector & inputs, } // namespace -BOOST_AUTO_TEST_CASE(SimpleConcatAxis0) + +BOOST_DATA_TEST_CASE(SimpleConcatAxis0, COMPUTE_DEVICES) { int32_t axis = 0; TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}}; @@ -139,10 +145,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis0) TestTensor expected{armnn::TensorShape{3,1,1,1},{0,1,2}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } -BOOST_AUTO_TEST_CASE(ConcatAxis0_NoInterleave) +BOOST_DATA_TEST_CASE(ConcatAxis0_NoInterleave, COMPUTE_DEVICES) { int32_t axis = 0; TestTensor aIn{armnn::TensorShape{2,1,2,1},{0, 1, @@ -159,10 +165,10 @@ BOOST_AUTO_TEST_CASE(ConcatAxis0_NoInterleave) 8, 9, 10, 11}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } -BOOST_AUTO_TEST_CASE(SimpleConcatAxis1) +BOOST_DATA_TEST_CASE(SimpleConcatAxis1, COMPUTE_DEVICES) { int32_t axis = 1; TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}}; @@ -171,10 +177,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis1) TestTensor expected{armnn::TensorShape{1,3,1,1},{0,1,2}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } -BOOST_AUTO_TEST_CASE(ConcatAxis1_NoInterleave) +BOOST_DATA_TEST_CASE(ConcatAxis1_NoInterleave, COMPUTE_DEVICES) { int32_t axis = 1; TestTensor aIn{armnn::TensorShape{1,2,2,1},{0, 1, @@ -191,10 +197,10 @@ BOOST_AUTO_TEST_CASE(ConcatAxis1_NoInterleave) 8, 9, 10, 11}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } -BOOST_AUTO_TEST_CASE(SimpleConcatAxis1_DoInterleave) +BOOST_DATA_TEST_CASE(SimpleConcatAxis1_DoInterleave, COMPUTE_DEVICES) { int32_t axis = 1; TestTensor aIn{armnn::TensorShape{2,2,1,1},{0, 1, @@ -207,10 +213,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis1_DoInterleave) TestTensor expected{armnn::TensorShape{2,6,1,1},{0, 1, 4, 5, 6, 10, 2, 3, 7, 8, 9, 11}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } -BOOST_AUTO_TEST_CASE(SimpleConcatAxis2) +BOOST_DATA_TEST_CASE(SimpleConcatAxis2, COMPUTE_DEVICES) { int32_t axis = 2; TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}}; @@ -219,10 +225,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis2) TestTensor expected{armnn::TensorShape{1,1,3,1},{0,1,2}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } -BOOST_AUTO_TEST_CASE(ConcatAxis2_NoInterleave) +BOOST_DATA_TEST_CASE(ConcatAxis2_NoInterleave, COMPUTE_DEVICES) { int32_t axis = 2; TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1, @@ -239,10 +245,10 @@ BOOST_AUTO_TEST_CASE(ConcatAxis2_NoInterleave) 8, 9, 10, 11}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } -BOOST_AUTO_TEST_CASE(SimpleConcatAxis2_DoInterleave) +BOOST_DATA_TEST_CASE(SimpleConcatAxis2_DoInterleave, COMPUTE_DEVICES) { int32_t axis = 2; TestTensor aIn{armnn::TensorShape{1,2,2,1},{0, 1, @@ -255,10 +261,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis2_DoInterleave) TestTensor expected{armnn::TensorShape{1,2,6,1},{0, 1, 4, 5, 6, 10, 2, 3, 7, 8, 9, 11}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } -BOOST_AUTO_TEST_CASE(SimpleConcatAxis3) +BOOST_DATA_TEST_CASE(SimpleConcatAxis3, COMPUTE_DEVICES) { int32_t axis = 3; TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}}; @@ -267,10 +273,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis3) TestTensor expected{armnn::TensorShape{1,1,1,3},{0,1,2}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } -BOOST_AUTO_TEST_CASE(SimpleConcatAxis3_DoInterleave) +BOOST_DATA_TEST_CASE(SimpleConcatAxis3_DoInterleave, COMPUTE_DEVICES) { int32_t axis = 3; TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1, @@ -283,10 +289,10 @@ BOOST_AUTO_TEST_CASE(SimpleConcatAxis3_DoInterleave) TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10, 2, 3, 7, 8, 9, 11}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } -BOOST_AUTO_TEST_CASE(AxisTooBig) +BOOST_DATA_TEST_CASE(AxisTooBig, COMPUTE_DEVICES) { int32_t axis = 4; TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}}; @@ -296,10 +302,10 @@ BOOST_AUTO_TEST_CASE(AxisTooBig) // see: https://www.tensorflow.org/api_docs/python/tf/concat TestTensor uncheckedOutput{armnn::TensorShape{1,1,1,1},{0}}; ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE; - MergerTestImpl({&aIn, &bIn}, axis, uncheckedOutput, expectedParserStatus); + MergerTestImpl({&aIn, &bIn}, axis, uncheckedOutput, sample, expectedParserStatus); } -BOOST_AUTO_TEST_CASE(AxisTooSmall) +BOOST_DATA_TEST_CASE(AxisTooSmall, COMPUTE_DEVICES) { int32_t axis = -5; TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}}; @@ -309,20 +315,20 @@ BOOST_AUTO_TEST_CASE(AxisTooSmall) // see: https://www.tensorflow.org/api_docs/python/tf/concat TestTensor uncheckedOutput{armnn::TensorShape{1,1,1,1},{0}}; ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE; - MergerTestImpl({&aIn, &bIn}, axis, uncheckedOutput, expectedParserStatus); + MergerTestImpl({&aIn, &bIn}, axis, uncheckedOutput, sample, expectedParserStatus); } -BOOST_AUTO_TEST_CASE(TooFewInputs) +BOOST_DATA_TEST_CASE(TooFewInputs, COMPUTE_DEVICES) { int32_t axis = 0; TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}}; // We need at least two tensors to concatenate ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE; - MergerTestImpl({&aIn}, axis, aIn, expectedParserStatus); + MergerTestImpl({&aIn}, axis, aIn, sample, expectedParserStatus); } -BOOST_AUTO_TEST_CASE(MismatchedInputDimensions) +BOOST_DATA_TEST_CASE(MismatchedInputDimensions, COMPUTE_DEVICES) { int32_t axis = 3; TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1, @@ -336,10 +342,10 @@ BOOST_AUTO_TEST_CASE(MismatchedInputDimensions) // The input dimensions must be compatible ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE; - MergerTestImpl({&aIn, &bIn, &mismatched}, axis, expected, expectedParserStatus); + MergerTestImpl({&aIn, &bIn, &mismatched}, axis, expected, sample, expectedParserStatus); } -BOOST_AUTO_TEST_CASE(MismatchedInputRanks) +BOOST_DATA_TEST_CASE(MismatchedInputRanks, COMPUTE_DEVICES) { int32_t axis = 2; TestTensor aIn{armnn::TensorShape{1,1,2},{0,1}}; @@ -348,10 +354,10 @@ BOOST_AUTO_TEST_CASE(MismatchedInputRanks) // The input dimensions must be compatible ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE; - MergerTestImpl({&aIn, &bIn}, axis, expected, expectedParserStatus); + MergerTestImpl({&aIn, &bIn}, axis, expected, sample, expectedParserStatus); } -BOOST_AUTO_TEST_CASE(MismatchedOutputDimensions) +BOOST_DATA_TEST_CASE(MismatchedOutputDimensions, COMPUTE_DEVICES) { int32_t axis = 3; TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1, @@ -366,10 +372,10 @@ BOOST_AUTO_TEST_CASE(MismatchedOutputDimensions) // The input and output dimensions must be compatible ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, expectedParserStatus); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, sample, expectedParserStatus); } -BOOST_AUTO_TEST_CASE(MismatchedOutputRank) +BOOST_DATA_TEST_CASE(MismatchedOutputRank, COMPUTE_DEVICES) { int32_t axis = 3; TestTensor aIn{armnn::TensorShape{1,1,2,2},{0, 1, @@ -384,10 +390,10 @@ BOOST_AUTO_TEST_CASE(MismatchedOutputRank) // The input and output ranks must match ErrorStatus expectedParserStatus = ErrorStatus::GENERAL_FAILURE; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, expectedParserStatus); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, sample, expectedParserStatus); } -BOOST_AUTO_TEST_CASE(ValidNegativeAxis) +BOOST_DATA_TEST_CASE(ValidNegativeAxis, COMPUTE_DEVICES) { // this is the same as 3 // see: https://www.tensorflow.org/api_docs/python/tf/concat @@ -402,7 +408,79 @@ BOOST_AUTO_TEST_CASE(ValidNegativeAxis) TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10, 2, 3, 7, 8, 9, 11}}; - MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected); + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); +} + +BOOST_DATA_TEST_CASE(SimpleConcatAxisZero3D, COMPUTE_DEVICES) +{ + int32_t axis = 0; + TestTensor aIn{armnn::TensorShape{1,1,1},{0}}; + TestTensor bIn{armnn::TensorShape{1,1,1},{1}}; + TestTensor cIn{armnn::TensorShape{1,1,1},{2}}; + + TestTensor expected{armnn::TensorShape{3,1,1},{0,1,2}}; + + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); +} + +BOOST_DATA_TEST_CASE(SimpleConcatAxisOne3D, COMPUTE_DEVICES) +{ + int32_t axis = 1; + TestTensor aIn{armnn::TensorShape{1,1,1},{0}}; + TestTensor bIn{armnn::TensorShape{1,1,1},{1}}; + TestTensor cIn{armnn::TensorShape{1,1,1},{2}}; + + TestTensor expected{armnn::TensorShape{1,3,1},{0,1,2}}; + + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); +} + +BOOST_DATA_TEST_CASE(SimpleConcatAxisTwo3D, COMPUTE_DEVICES) +{ + int32_t axis = 2; + TestTensor aIn{armnn::TensorShape{1,1,1},{0}}; + TestTensor bIn{armnn::TensorShape{1,1,1},{1}}; + TestTensor cIn{armnn::TensorShape{1,1,1},{2}}; + + TestTensor expected{armnn::TensorShape{1,1,3},{0,1,2}}; + + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); +} + +BOOST_DATA_TEST_CASE(SimpleConcatAxisZero2D, COMPUTE_DEVICES) +{ + int32_t axis = 0; + TestTensor aIn{armnn::TensorShape{1,1},{0}}; + TestTensor bIn{armnn::TensorShape{1,1},{1}}; + TestTensor cIn{armnn::TensorShape{1,1},{2}}; + + TestTensor expected{armnn::TensorShape{3,1},{0,1,2}}; + + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); +} + +BOOST_DATA_TEST_CASE(SimpleConcatAxisOne2D, COMPUTE_DEVICES) +{ + int32_t axis = 1; + TestTensor aIn{armnn::TensorShape{1,1},{0}}; + TestTensor bIn{armnn::TensorShape{1,1},{1}}; + TestTensor cIn{armnn::TensorShape{1,1},{2}}; + + TestTensor expected{armnn::TensorShape{1,3},{0,1,2}}; + + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); +} + +BOOST_DATA_TEST_CASE(SimpleConcatAxisZero1D, COMPUTE_DEVICES) +{ + int32_t axis = 0; + TestTensor aIn{armnn::TensorShape{1},{0}}; + TestTensor bIn{armnn::TensorShape{1},{1}}; + TestTensor cIn{armnn::TensorShape{1},{2}}; + + TestTensor expected{armnn::TensorShape{3},{0,1,2}}; + + MergerTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample); } BOOST_AUTO_TEST_SUITE_END() diff --git a/test/Tests.cpp b/test/Tests.cpp index 3fa8e125..df98b2ca 100644 --- a/test/Tests.cpp +++ b/test/Tests.cpp @@ -11,9 +11,9 @@ BOOST_AUTO_TEST_SUITE(DriverTests) -using ArmnnDriver = armnn_driver::ArmnnDriver; -using DriverOptions = armnn_driver::DriverOptions; +using namespace android::hardware; using namespace driverTestHelpers; +using namespace armnn_driver; BOOST_AUTO_TEST_CASE(Init) { @@ -31,9 +31,9 @@ BOOST_AUTO_TEST_CASE(TestCapabilities) auto driver = std::make_unique(DriverOptions(armnn::Compute::CpuRef)); ErrorStatus error; - V1_0::Capabilities cap; + neuralnetworks::V1_0::Capabilities cap; - ArmnnDriver::getCapabilities_cb cb = [&](ErrorStatus status, const V1_0::Capabilities& capabilities) + auto cb = [&](ErrorStatus status, const neuralnetworks::V1_0::Capabilities& capabilities) { error = status; cap = capabilities; diff --git a/test/UtilsTests.cpp b/test/UtilsTests.cpp index e7e6cde7..72b6d361 100644 --- a/test/UtilsTests.cpp +++ b/test/UtilsTests.cpp @@ -16,9 +16,10 @@ BOOST_AUTO_TEST_SUITE(UtilsTests) -using namespace armnn_driver; -using namespace android::nn; using namespace android; +using namespace android::nn; +using namespace android::hardware; +using namespace armnn_driver; // The following are helpers for writing unit tests for the driver. namespace @@ -28,9 +29,9 @@ struct ExportNetworkGraphFixture { public: // Setup: set the output dump directory and an empty dummy model (as only its memory address is used). - // Defaulting the output dump directory to "/sdcard" because it should exist and be writable in all deployments. + // Defaulting the output dump directory to "/data" because it should exist and be writable in all deployments. ExportNetworkGraphFixture() - : ExportNetworkGraphFixture("/sdcard") + : ExportNetworkGraphFixture("/data") {} ExportNetworkGraphFixture(const std::string& requestInputsAndOutputsDumpDir) : m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir) @@ -95,7 +96,7 @@ public: } std::string m_RequestInputsAndOutputsDumpDir; - V1_0::Model m_Model; + neuralnetworks::V1_0::Model m_Model; private: std::string m_FileName; -- cgit v1.2.1