aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Sloyan <matthew.sloyan@arm.com>2021-04-27 17:16:12 +0100
committerMatthew Sloyan <matthew.sloyan@arm.com>2021-04-28 11:16:36 +0000
commitc2b99a8783388ec3bd90dfed2e1b6d4f4d4bd1c8 (patch)
tree79532141b41ae18ce344ee4c1f469ce2e3310dd1
parent2241d18f16878ddef261eadda9a0a8f0672a60c8 (diff)
downloadarmnn-c2b99a8783388ec3bd90dfed2e1b6d4f4d4bd1c8.tar.gz
IVGCVSW-5831 Add additional options to Arm NN External Delegate
* Added enable-fast-math and number-of-threads options. * Added save-cached-network and cached-network-filepath options. * Added external_delegate python tests for new options. Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com> Change-Id: I7cf6522a6f895cd71ed8f369d94a5113d78594f9
-rw-r--r--delegate/python/test/test_external_delegate.py109
-rw-r--r--delegate/src/armnn_external_delegate.cpp53
2 files changed, 162 insertions, 0 deletions
diff --git a/delegate/python/test/test_external_delegate.py b/delegate/python/test/test_external_delegate.py
index 93d373d0a1..f01a2d3928 100644
--- a/delegate/python/test/test_external_delegate.py
+++ b/delegate/python/test/test_external_delegate.py
@@ -57,6 +57,115 @@ def test_external_delegate_options_gpu_tuning(delegate_dir, test_data_folder, tm
# cleanup
os.remove(tuning_file)
+@pytest.mark.GpuAccTest
+def test_external_delegate_options_gpu_cached_network(delegate_dir, test_data_folder, tmp_path):
+
+ binary_file = os.path.join(str(tmp_path), "test_binary.bin")
+ # cleanup previous test run if necessary
+ if os.path.exists(binary_file):
+ os.remove(binary_file)
+
+ # Create blank binary file to write to.
+ open(binary_file, 'a').close()
+ assert (os.path.exists(binary_file))
+ assert (os.stat(binary_file).st_size == 0)
+
+ # Run inference to save cached network.
+ armnn_delegate = tflite.load_delegate(
+ delegate_dir,
+ options={
+ "backends": "GpuAcc",
+ "save-cached-network": "1",
+ "cached-network-filepath": binary_file,
+ "logging-severity": "info"})
+
+ run_mock_model(armnn_delegate, test_data_folder)
+
+ # destroy delegate and check if file has been saved.
+ armnn_delegate.__del__()
+ assert (os.stat(binary_file).st_size != 0)
+
+ # Create second delegate to load in binary file created.
+ armnn_delegate2 = tflite.load_delegate(
+ delegate_dir,
+ options={
+ "backends": "GpuAcc",
+ "cached-network-filepath": binary_file,
+ "logging-severity": "info"})
+
+ run_mock_model(armnn_delegate2, test_data_folder)
+
+ # cleanup
+ os.remove(binary_file)
+
+@pytest.mark.GpuAccTest
+def test_external_delegate_gpu_fastmath(delegate_dir, test_data_folder):
+ # create armnn delegate with enable-fast-math
+ # fast-math is only enabled on Conv2d layer, so use conv2d model.
+ armnn_delegate = tflite.load_delegate(delegate_dir, options = {'backends': 'GpuAcc',
+ 'enable-fast-math': '1',
+ "logging-severity": "info"})
+
+ model_file_name = 'conv2d.tflite'
+
+ inputShape = [ 1, 5, 5, 1 ]
+ outputShape = [ 1, 3, 3, 1 ]
+
+ inputValues = [ 1, 5, 2, 3, 5,
+ 8, 7, 3, 6, 3,
+ 3, 3, 9, 1, 9,
+ 4, 1, 8, 1, 3,
+ 6, 8, 1, 9, 2 ]
+
+ expectedResult = [ 28, 38, 29,
+ 96, 104, 53,
+ 31, 55, 24 ]
+
+ input = np.array(inputValues, dtype=np.float32).reshape(inputShape)
+ expected_output = np.array(expectedResult, dtype=np.float32).reshape(outputShape)
+
+ # run the inference
+ armnn_outputs = run_inference(test_data_folder, model_file_name, [input], [armnn_delegate])
+
+ # check results
+ compare_outputs(armnn_outputs, [expected_output])
+
+@pytest.mark.CpuAccTest
+def test_external_delegate_cpu_options(capfd, delegate_dir, test_data_folder):
+ # create armnn delegate with enable-fast-math and number-of-threads options
+ # fast-math is only enabled on Conv2d layer, so use conv2d model.
+ armnn_delegate = tflite.load_delegate(delegate_dir, options = {'backends': 'CpuAcc',
+ 'enable-fast-math': '1',
+ 'number-of-threads': '4',
+ "logging-severity": "info"})
+
+ model_file_name = 'conv2d.tflite'
+
+ inputShape = [ 1, 5, 5, 1 ]
+ outputShape = [ 1, 3, 3, 1 ]
+
+ inputValues = [ 1, 5, 2, 3, 5,
+ 8, 7, 3, 6, 3,
+ 3, 3, 9, 1, 9,
+ 4, 1, 8, 1, 3,
+ 6, 8, 1, 9, 2 ]
+
+ expectedResult = [ 28, 38, 29,
+ 96, 104, 53,
+ 31, 55, 24 ]
+
+ input = np.array(inputValues, dtype=np.float32).reshape(inputShape)
+ expected_output = np.array(expectedResult, dtype=np.float32).reshape(outputShape)
+
+ # run the inference
+ armnn_outputs = run_inference(test_data_folder, model_file_name, [input], [armnn_delegate])
+
+ # check results
+ compare_outputs(armnn_outputs, [expected_output])
+
+ captured = capfd.readouterr()
+ assert 'Set CPPScheduler to Linear mode, with 4 threads to use' in captured.out
+
def test_external_delegate_options_wrong_logging_level(delegate_dir):
with pytest.raises(ValueError):
tflite.load_delegate(
diff --git a/delegate/src/armnn_external_delegate.cpp b/delegate/src/armnn_external_delegate.cpp
index edf46efb98..27eaf64f73 100644
--- a/delegate/src/armnn_external_delegate.cpp
+++ b/delegate/src/armnn_external_delegate.cpp
@@ -4,6 +4,7 @@
//
#include "armnn_delegate.hpp"
#include <armnn/Logging.hpp>
+#include <armnn/utility/NumericCast.hpp>
#include <iostream>
#include <tensorflow/lite/minimal_logging.h>
@@ -54,6 +55,10 @@ std::vector<std::string> gpu_options {"gpu-tuning-level",
* 1,2 and 3 will create a tuning-file, 0 will apply the
* tunings from an existing file
*
+ * Option key: "gpu-mlgo-tuning-file" \n
+ * Possible values: [filenameString] \n
+ * Description: File name for the MLGO tuning file
+ *
* Option key: "gpu-tuning-file" \n
* Possible values: [filenameString] \n
* Description: File name for the tuning file.
@@ -62,6 +67,28 @@ std::vector<std::string> gpu_options {"gpu-tuning-level",
* Possible values: ["true"/"false"] \n
* Description: Enables GPU kernel profiling
*
+ * Option key: "save-cached-network" \n
+ * Possible values: ["true"/"false"] \n
+ * Description: Enables saving of the cached network to a file,
+ * specified with the cached-network-filepath option
+ *
+ * Option key: "cached-network-filepath" \n
+ * Possible values: [filenameString] \n
+ * Description: If non-empty, the given file will be used to load/save the cached network.
+ * If save-cached-network is given then the cached network will be saved to the given file.
+ * To save the cached network a file must already exist.
+ * If save-cached-network is not given then the cached network will be loaded from the given file.
+ * This will remove initial compilation time of kernels and speed up the first execution.
+ *
+ * Option key: "enable-fast-math" \n
+ * Possible values: ["true"/"false"] \n
+ * Description: Enables fast_math options in backends that support it
+ *
+ * Option key: "number-of-threads" \n
+ * Possible values: ["1"-"64"] \n
+ * Description: Assign the number of threads used by the CpuAcc backend.
+ * Default is set to 0 (Backend will decide number of threads to use).
+ *
* Option key: "reduce-fp32-to-fp16" \n
* Possible values: ["true"/"false"] \n
* Description: Reduce Fp32 data to Fp16 for faster processing
@@ -140,6 +167,32 @@ TfLiteDelegate* tflite_plugin_create_delegate(char** options_keys,
armnn::BackendOptions option("GpuAcc", {{"KernelProfilingEnabled", (*options_values[i] != '0')}});
options.AddBackendOption(option);
}
+ else if (std::string(options_keys[i]) == std::string("save-cached-network"))
+ {
+ armnn::BackendOptions option("GpuAcc", {{"SaveCachedNetwork", (*options_values[i] != '0')}});
+ optimizerOptions.m_ModelOptions.push_back(option);
+ }
+ else if (std::string(options_keys[i]) == std::string("cached-network-filepath"))
+ {
+ armnn::BackendOptions option("GpuAcc", {{"CachedNetworkFilePath", std::string(options_values[i])}});
+ optimizerOptions.m_ModelOptions.push_back(option);
+ }
+ // Process GPU & CPU backend options
+ else if (std::string(options_keys[i]) == std::string("enable-fast-math"))
+ {
+ armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled", (*options_values[i] != '0')}});
+ optimizerOptions.m_ModelOptions.push_back(modelOptionGpu);
+
+ armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled", (*options_values[i] != '0')}});
+ optimizerOptions.m_ModelOptions.push_back(modelOptionCpu);
+ }
+ // Process CPU backend options
+ else if (std::string(options_keys[i]) == std::string("number-of-threads"))
+ {
+ unsigned int numberOfThreads = armnn::numeric_cast<unsigned int>(atoi(options_values[i]));
+ armnn::BackendOptions modelOption("CpuAcc", {{"NumberOfThreads", numberOfThreads}});
+ optimizerOptions.m_ModelOptions.push_back(modelOption);
+ }
// Process reduce-fp32-to-fp16 option
else if (std::string(options_keys[i]) == std::string("reduce-fp32-to-fp16"))
{