aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Sloyan <matthew.sloyan@arm.com>2021-02-10 16:50:53 +0000
committerMatthew Sloyan <matthew.sloyan@arm.com>2021-02-12 17:56:58 +0000
commit0a7dc6bba5d0810fe2ed6f84b0376a8b0674c0b3 (patch)
tree6931067f0e0ff25be7d6a01d379f4b1497dff424
parentb454c5c65efb238c130b042ace390b2bc7f0bf75 (diff)
downloadarmnn-0a7dc6bba5d0810fe2ed6f84b0376a8b0674c0b3.tar.gz
IVGCVSW-5685 Add CpuAcc specific configuration option numberOfThreads
* Added ability to set number of threads used in CpuAcc backend * Enabled number-of-threads option in ExecuteNetwork * Added TfLiteDelegate ModelOptions test * Added unsigned int type to BackendOptions.hpp Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com> Change-Id: Ia576d4f45cbe5df3654bc730bb5ebd5181d82b5a
-rw-r--r--delegate/src/test/DelegateOptionsTest.cpp36
-rw-r--r--include/armnn/BackendOptions.hpp14
-rw-r--r--src/backends/neon/NeonBackendModelContext.cpp20
-rw-r--r--src/backends/neon/NeonBackendModelContext.hpp5
-rw-r--r--src/backends/neon/NeonWorkloadFactory.cpp21
-rw-r--r--src/backends/neon/NeonWorkloadFactory.hpp4
-rw-r--r--src/backends/neon/test/NeonOptimizedNetworkTests.cpp32
-rw-r--r--tests/ExecuteNetwork/ExecuteNetwork.cpp1
-rw-r--r--tests/ExecuteNetwork/ExecuteNetworkParams.hpp1
-rw-r--r--tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp6
-rw-r--r--tests/InferenceModel.hpp5
11 files changed, 143 insertions, 2 deletions
diff --git a/delegate/src/test/DelegateOptionsTest.cpp b/delegate/src/test/DelegateOptionsTest.cpp
index 23510c7777..6024754be8 100644
--- a/delegate/src/test/DelegateOptionsTest.cpp
+++ b/delegate/src/test/DelegateOptionsTest.cpp
@@ -150,4 +150,40 @@ TEST_CASE ("ArmnnDelegateOptimizerOptionsImport")
}
+TEST_SUITE("DelegateOptions_CpuAccTests")
+{
+
+TEST_CASE ("ArmnnDelegateModelOptions_CpuAcc_Test")
+{
+ std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+ std::vector<int32_t> tensorShape { 1, 2, 2, 1 };
+ std::vector<float> inputData = { 1, 2, 3, 4 };
+ std::vector<float> divData = { 2, 2, 3, 4 };
+ std::vector<float> expectedResult = { 1, 2, 2, 2 };
+
+ unsigned int numberOfThreads = 2;
+
+ armnn::ModelOptions modelOptions;
+ armnn::BackendOptions cpuAcc("CpuAcc",
+ {
+ { "FastMathEnabled", true },
+ { "NumberOfThreads", numberOfThreads }
+ });
+ modelOptions.push_back(cpuAcc);
+
+ armnn::OptimizerOptions optimizerOptions(false, false, false, false, modelOptions);
+ armnnDelegate::DelegateOptions delegateOptions(backends, optimizerOptions);
+
+ DelegateOptionTest<float>(::tflite::TensorType_FLOAT32,
+ backends,
+ tensorShape,
+ inputData,
+ inputData,
+ divData,
+ expectedResult,
+ delegateOptions);
+}
+
+}
+
} // namespace armnnDelegate
diff --git a/include/armnn/BackendOptions.hpp b/include/armnn/BackendOptions.hpp
index 4aee070866..b705f41505 100644
--- a/include/armnn/BackendOptions.hpp
+++ b/include/armnn/BackendOptions.hpp
@@ -24,6 +24,7 @@ private:
struct CheckAllowed
{
static const bool value = std::is_same<T, int>::value ||
+ std::is_same<T, unsigned int>::value ||
std::is_same<T, float>::value ||
std::is_same<T, bool>::value ||
std::is_same<T, std::string>::value ||
@@ -38,6 +39,7 @@ public:
public:
/// Constructors
explicit Var(int i) : m_Vals(i), m_Type(VarTypes::Integer) {};
+ explicit Var(unsigned int u) : m_Vals(u), m_Type(VarTypes::UnsignedInteger) {};
explicit Var(float f) : m_Vals(f), m_Type(VarTypes::Float) {};
explicit Var(bool b) : m_Vals(b), m_Type(VarTypes::Boolean) {};
explicit Var(const char* s) : m_Vals(s), m_Type(VarTypes::String) {};
@@ -107,12 +109,14 @@ public:
/// Type getters
bool IsBool() const { return m_Type == VarTypes::Boolean; }
bool IsInt() const { return m_Type == VarTypes::Integer; }
+ bool IsUnsignedInt() const { return m_Type == VarTypes::UnsignedInteger; }
bool IsFloat() const { return m_Type == VarTypes::Float; }
bool IsString() const { return m_Type == VarTypes::String; }
/// Value getters
bool AsBool() const { assert(IsBool()); return m_Vals.b; }
int AsInt() const { assert(IsInt()); return m_Vals.i; }
+ unsigned int AsUnsignedInt() const { assert(IsUnsignedInt()); return m_Vals.u; }
float AsFloat() const { assert(IsFloat()); return m_Vals.f; }
std::string AsString() const { assert(IsString()); return m_Vals.s; }
@@ -136,6 +140,10 @@ public:
{
func(m_Vals.i, other.m_Vals.i);
}
+ else if (other.IsUnsignedInt())
+ {
+ func(m_Vals.u, other.m_Vals.u);
+ }
else if (other.IsFloat())
{
func(m_Vals.f, other.m_Vals.f);
@@ -163,12 +171,14 @@ public:
Integer,
Float,
String,
+ UnsignedInteger
};
/// Union of potential type values.
union Vals
{
int i;
+ unsigned int u;
float f;
bool b;
std::string s;
@@ -177,6 +187,7 @@ public:
~Vals(){}
explicit Vals(int i) : i(i) {};
+ explicit Vals(unsigned int u) : u(u) {};
explicit Vals(float f) : f(f) {};
explicit Vals(bool b) : b(b) {};
explicit Vals(const char* s) : s(std::string(s)) {}
@@ -196,6 +207,9 @@ public:
BackendOption(std::string name, int value)
: m_Name(name), m_Value(value)
{}
+ BackendOption(std::string name, unsigned int value)
+ : m_Name(name), m_Value(value)
+ {}
BackendOption(std::string name, float value)
: m_Name(name), m_Value(value)
{}
diff --git a/src/backends/neon/NeonBackendModelContext.cpp b/src/backends/neon/NeonBackendModelContext.cpp
index 2be71e5ded..270592e94d 100644
--- a/src/backends/neon/NeonBackendModelContext.cpp
+++ b/src/backends/neon/NeonBackendModelContext.cpp
@@ -17,13 +17,22 @@ bool ParseBool(const armnn::BackendOptions::Var& value, bool defaultValue)
return defaultValue;
}
+unsigned int ParseUnsignedInt(const armnn::BackendOptions::Var& value, unsigned int defaultValue)
+{
+ if (value.IsUnsignedInt())
+ {
+ return value.AsUnsignedInt();
+ }
+ return defaultValue;
+}
+
} // namespace anonymous
namespace armnn
{
NeonBackendModelContext::NeonBackendModelContext(const ModelOptions& modelOptions)
- : m_IsFastMathEnabled(false)
+ : m_IsFastMathEnabled(false), m_NumberOfThreads(0)
{
if (!modelOptions.empty())
{
@@ -33,6 +42,10 @@ NeonBackendModelContext::NeonBackendModelContext(const ModelOptions& modelOption
{
m_IsFastMathEnabled |= ParseBool(value, false);
}
+ if (name == "NumberOfThreads")
+ {
+ m_NumberOfThreads |= ParseUnsignedInt(value, 0);
+ }
});
}
}
@@ -42,4 +55,9 @@ bool NeonBackendModelContext::IsFastMathEnabled() const
return m_IsFastMathEnabled;
}
+unsigned int NeonBackendModelContext::GetNumberOfThreads() const
+{
+ return m_NumberOfThreads;
+}
+
} // namespace armnn \ No newline at end of file
diff --git a/src/backends/neon/NeonBackendModelContext.hpp b/src/backends/neon/NeonBackendModelContext.hpp
index 430ae45a6f..e736efc1d8 100644
--- a/src/backends/neon/NeonBackendModelContext.hpp
+++ b/src/backends/neon/NeonBackendModelContext.hpp
@@ -14,6 +14,8 @@ namespace armnn
/// - "FastMathEnabled"\n
/// Using the fast_math flag can lead to performance improvements in fp32 and fp16 layers but may result in\n
/// results with reduced or different precision. The fast_math flag will not have any effect on int8 performance.
+/// - "NumberOfThreads"\n
+/// Specify the number of threads used by the CpuAcc backend.
class NeonBackendModelContext : public IBackendModelContext
{
public:
@@ -21,8 +23,11 @@ public:
bool IsFastMathEnabled() const;
+ unsigned int GetNumberOfThreads() const;
+
private:
bool m_IsFastMathEnabled;
+ unsigned int m_NumberOfThreads;
};
} // namespace armnn \ No newline at end of file
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 7d0942874e..28352011dc 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -51,15 +51,36 @@ const BackendId& NeonWorkloadFactory::GetBackendId() const
return s_Id;
}
+void NeonWorkloadFactory::SetNumberOfThreads()
+{
+ if (m_ModelContextPtr)
+ {
+ const unsigned int MIN_THREADS = 1;
+ const unsigned int MAX_THREADS = 64;
+
+ // Set the number of threads to be used if the user has set NumberOfThreads param
+ // Only set if within limit or valid input
+ auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
+ auto numberOfThreads = modelOptions->GetNumberOfThreads();
+
+ if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS)
+ {
+ arm_compute::Scheduler::get().set_num_threads(numberOfThreads);
+ }
+ }
+}
+
NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
: m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
{
+ SetNumberOfThreads();
}
NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
: m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
{
+ SetNumberOfThreads();
}
std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index 4817a06540..ee11002033 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -11,6 +11,8 @@
#include <aclCommon/BaseMemoryManager.hpp>
#include <armnn/utility/IgnoreUnused.hpp>
+#include <arm_compute/runtime/IScheduler.h>
+
namespace armnn
{
@@ -255,6 +257,8 @@ public:
const WorkloadInfo& info) const override;
private:
+ void SetNumberOfThreads();
+
mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager;
const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr;
};
diff --git a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp
index 302711029e..85f06174c7 100644
--- a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp
+++ b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp
@@ -110,4 +110,36 @@ BOOST_AUTO_TEST_CASE(FastMathEnabledTestOnCpuAcc)
BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsBool() == true);
}
+BOOST_AUTO_TEST_CASE(NumberOfThreadsTestOnCpuAcc)
+{
+ armnn::INetworkPtr net(armnn::INetwork::Create());
+
+ armnn::IConnectableLayer* input = net->AddInputLayer(0);
+ armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+ input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ unsigned int numberOfThreads = 2;
+
+ std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+ armnn::OptimizerOptions optimizerOptions;
+ armnn::BackendOptions modelOptions("CpuAcc", {{"NumberOfThreads", numberOfThreads}});
+ optimizerOptions.m_ModelOptions.push_back(modelOptions);
+
+ armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(
+ *net, backends, runtime->GetDeviceSpec(), optimizerOptions);
+
+ BOOST_CHECK(optimizedNet);
+
+ auto modelOptionsOut = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetModelOptions();
+
+ BOOST_TEST(modelOptionsOut.size() == 1);
+ BOOST_TEST(modelOptionsOut[0].GetOption(0).GetName() == "NumberOfThreads");
+ BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsUnsignedInt() == numberOfThreads);
+}
+
BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index 5df5dfbce7..c19f519c73 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -308,6 +308,7 @@ int MainImpl(const ExecuteNetworkParams& params,
inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath;
inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork;
inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
+ inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
for(const std::string& inputName: params.m_InputNames)
{
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
index 56d32907b8..830270adbc 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
@@ -34,6 +34,7 @@ struct ExecuteNetworkParams
size_t m_Iterations;
std::string m_ModelFormat;
std::string m_ModelPath;
+ unsigned int m_NumberOfThreads;
std::vector<std::string> m_OutputNames;
std::vector<std::string> m_OutputTensorFiles;
std::vector<std::string> m_OutputTypes;
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index a080e57d0c..73da1f1d1d 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -276,6 +276,12 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
"performance improvements but may result in reduced or different precision.",
cxxopts::value<bool>(m_ExNetParams.m_EnableFastMath)->default_value("false")->implicit_value("true"))
+ ("number-of-threads",
+ "Assign the number of threads used by the CpuAcc backend. "
+ "Input value must be between 1 and 64. "
+ "Default is set to 0 (Backend will decide number of threads to use).",
+ cxxopts::value<unsigned int>(m_ExNetParams.m_NumberOfThreads)->default_value("0"))
+
("save-cached-network",
"Enables saving of the cached network to a file given with the cached-network-filepath option. "
"See also --cached-network-filepath",
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index 936d0bf9ea..d20bb2271f 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -99,6 +99,7 @@ struct Params
bool m_EnableFastMath;
bool m_SaveCachedNetwork;
std::string m_CachedNetworkFilePath;
+ unsigned int m_NumberOfThreads;
Params()
: m_ComputeDevices{}
@@ -113,6 +114,7 @@ struct Params
, m_EnableFastMath(false)
, m_SaveCachedNetwork(false)
, m_CachedNetworkFilePath("")
+ , m_NumberOfThreads(0)
{}
};
@@ -436,7 +438,8 @@ public:
});
armnn::BackendOptions cpuAcc("CpuAcc",
{
- { "FastMathEnabled", params.m_EnableFastMath }
+ { "FastMathEnabled", params.m_EnableFastMath },
+ { "NumberOfThreads", params.m_NumberOfThreads }
});
options.m_ModelOptions.push_back(gpuAcc);
options.m_ModelOptions.push_back(cpuAcc);