MLCE-545 INT8 TFLite model execution abnormal

* Add functionality to print output tensors to file in tempdir * UnitTests Signed-off-by: Keith Davis <keith.davis@arm.com> Change-Id: Idfb4c186544187db1fecdfca11c662540f645439
author: Keith Davis <keith.davis@arm.com> 2022-10-14 15:50:33 +0100
committer: KeithARM <keith.davis@arm.com> 2022-10-19 10:33:40 +0000
commit: 15f9c68adef324cd0158cea3d021c0f6bef5eecf (patch)
tree: 1cd48b345d182fd19efdc40a32e2540befd8f925
parent: 7bbf56598010041ea46c3fa9d32604db777ee26e (diff)
download: armnn-15f9c68adef324cd0158cea3d021c0f6bef5eecf.tar.gz
21 files changed, 350 insertions, 132 deletions
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 0289a90e71..687f2c3e81 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -129,6 +129,7 @@ struct OptimizerOptions
     OptimizerOptions()
         : m_ReduceFp32ToFp16(false)
         , m_Debug(false)
+        , m_DebugToFile(false)
         , m_ReduceFp32ToBf16(false)
         , m_shapeInferenceMethod(armnn::ShapeInferenceMethod::ValidateOnly)
         , m_ImportEnabled(false)
@@ -139,9 +140,10 @@ struct OptimizerOptions
     {}
 
     OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16, bool importEnabled,
-                     ModelOptions modelOptions = {}, bool exportEnabled = false)
+                     ModelOptions modelOptions = {}, bool exportEnabled = false, bool debugToFile = false)
         : m_ReduceFp32ToFp16(reduceFp32ToFp16)
         , m_Debug(debug)
+        , m_DebugToFile(debugToFile)
         , m_ReduceFp32ToBf16(reduceFp32ToBf16)
         , m_shapeInferenceMethod(armnn::ShapeInferenceMethod::ValidateOnly)
         , m_ImportEnabled(importEnabled)
@@ -159,9 +161,10 @@ struct OptimizerOptions
     OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16 = false,
                      ShapeInferenceMethod shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly,
                      bool importEnabled = false, ModelOptions modelOptions = {}, bool exportEnabled = false,
-                     bool allowExpandedDims = false)
+                     bool debugToFile = false, bool allowExpandedDims = false)
         : m_ReduceFp32ToFp16(reduceFp32ToFp16)
         , m_Debug(debug)
+        , m_DebugToFile(debugToFile)
         , m_ReduceFp32ToBf16(reduceFp32ToBf16)
         , m_shapeInferenceMethod(shapeInferenceMethod)
         , m_ImportEnabled(importEnabled)
@@ -183,6 +186,7 @@ struct OptimizerOptions
         stream << "\tReduceFp32ToFp16: " << m_ReduceFp32ToFp16 << "\n";
         stream << "\tReduceFp32ToBf16: " << m_ReduceFp32ToBf16 << "\n";
         stream << "\tDebug: " << m_Debug << "\n";
+        stream << "\tDebug to file: " << m_DebugToFile << "\n";
         stream << "\tShapeInferenceMethod: " <<
         (m_shapeInferenceMethod == ShapeInferenceMethod::ValidateOnly ? "ValidateOnly" : "InferAndValidate") << "\n";
         stream << "\tImportEnabled: " << m_ImportEnabled << "\n";
@@ -215,6 +219,9 @@ struct OptimizerOptions
     // Add debug data for easier troubleshooting
     bool m_Debug;
 
+    // Pass debug data to separate output files for easier troubleshooting
+    bool m_DebugToFile;
+
     /// Reduces all Fp32 operators in the model to Bf16 for faster processing.
     /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers
     ///       between layers that weren't in Fp32 in the first place or if the operator is not supported in Bf16.
diff --git a/include/armnn/backends/WorkloadData.hpp b/include/armnn/backends/WorkloadData.hpp
index 214ea7b060..bd2b3ecaa9 100644
--- a/include/armnn/backends/WorkloadData.hpp
+++ b/include/armnn/backends/WorkloadData.hpp
@@ -522,6 +522,8 @@ struct DebugQueueDescriptor : QueueDescriptor
     LayerGuid m_Guid;
     std::string m_LayerName;
     unsigned int m_SlotIndex;
+
+    bool m_LayerOutputToFile = false;
 };
 
 struct RsqrtQueueDescriptor : QueueDescriptor
diff --git a/include/armnnUtils/Filesystem.hpp b/include/armnnUtils/Filesystem.hpp
index 0d29a7558d..00da50f4f8 100644
--- a/include/armnnUtils/Filesystem.hpp
+++ b/include/armnnUtils/Filesystem.hpp
@@ -19,9 +19,16 @@ namespace armnnUtils
 namespace Filesystem
 {
 
+using FileContents = std::string;
+
 /// Returns a path to a file in the system temporary folder. If the file existed it will be deleted.
 fs::path NamedTempFile(const char* fileName);
 
+/// Returns full path to temporary folder
+std::string CreateDirectory(std::string sPath);
+
+FileContents ReadFileContentsIntoString(const std::string path);
+
 } // namespace armnnUtils
 } // namespace Filesystem
 
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 1b1815f73d..bb6eb19fa3 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1810,10 +1810,14 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
     // This must occur after all topological changes to the graph and any redirection of variables
     // If the debug flag is set, then insert a DebugLayer after each layer
     // Doing this after applying the backend optimizations as they might have changed some layers
-    if (options.m_Debug)
+    if (options.m_Debug && !options.m_DebugToFile)
     {
         Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
     }
+    else if (options.m_DebugToFile)
+    {
+        Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugToFileLayer()));
+    }
 
     // Calculate the compatibility strategies for tensor handles
     OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp
index 5ff0e6c4e1..aaee4eba1a 100644
--- a/src/armnn/NetworkUtils.cpp
+++ b/src/armnn/NetworkUtils.cpp
@@ -242,7 +242,7 @@ std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& g
     return convertLayers;
 }
 
-std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer)
+std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer, bool toFile)
 {
     std::vector<DebugLayer*> debugLayers;
     debugLayers.reserve(layer.GetNumOutputSlots());
@@ -255,7 +255,7 @@ std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer)
             std::to_string(outputSlotIdx);
 
         DebugLayer* debugLayer =
-            graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str());
+            graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str(), toFile);
 
         // Sets output tensor info for the debug layer.
         ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
diff --git a/src/armnn/NetworkUtils.hpp b/src/armnn/NetworkUtils.hpp
index 77dd068cb3..38e0aabaf9 100644
--- a/src/armnn/NetworkUtils.hpp
+++ b/src/armnn/NetworkUtils.hpp
@@ -27,7 +27,7 @@ std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph&
 
 std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer);
 
-std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer);
+std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer, bool toFile);
 
 bool RevertConstantWeightsToFP32(Layer* layer);
 
diff --git a/src/armnn/layers/DebugLayer.cpp b/src/armnn/layers/DebugLayer.cpp
index 8342c530b2..e1c8572c95 100644
--- a/src/armnn/layers/DebugLayer.cpp
+++ b/src/armnn/layers/DebugLayer.cpp
@@ -13,7 +13,13 @@ namespace armnn
 {
 
 DebugLayer::DebugLayer(const char* name)
-    : Layer(1, 1, LayerType::Debug, name)
+    : Layer(1, 1, LayerType::Debug, name),
+      m_ToFile(false)
+{}
+
+DebugLayer::DebugLayer(const char* name, bool toFile)
+    : Layer(1, 1, LayerType::Debug, name),
+      m_ToFile(toFile)
 {}
 
 std::unique_ptr<IWorkload> DebugLayer::CreateWorkload(const IWorkloadFactory& factory) const
@@ -24,6 +30,7 @@ std::unique_ptr<IWorkload> DebugLayer::CreateWorkload(const IWorkloadFactory& fa
     descriptor.m_Guid = prevLayer.GetGuid();
     descriptor.m_LayerName = prevLayer.GetNameStr();
     descriptor.m_SlotIndex = GetInputSlot(0).GetConnectedOutputSlot()->CalculateIndexOnOwner();
+    descriptor.m_LayerOutputToFile = m_ToFile;
 
     SetAdditionalInfo(descriptor);
 
diff --git a/src/armnn/layers/DebugLayer.hpp b/src/armnn/layers/DebugLayer.hpp
index fe7ad5c9e5..662195d7bc 100644
--- a/src/armnn/layers/DebugLayer.hpp
+++ b/src/armnn/layers/DebugLayer.hpp
@@ -34,9 +34,13 @@ protected:
     /// Constructor to create a DebugLayer.
     /// @param [in] name Optional name for the layer.
     DebugLayer(const char* name);
+    DebugLayer(const char* name, bool toFile);
 
     /// Default destructor
     ~DebugLayer() = default;
+
+private:
+    bool m_ToFile;
 };
 
 } // namespace armnn
diff --git a/src/armnn/optimizations/AddDebug.hpp b/src/armnn/optimizations/AddDebug.hpp
index 60271b0d77..e0c79ae53f 100644
--- a/src/armnn/optimizations/AddDebug.hpp
+++ b/src/armnn/optimizations/AddDebug.hpp
@@ -22,7 +22,7 @@ public:
         {
             // if the inputs/outputs of this layer do not have a debug layer
             // insert the debug layer after them
-            InsertDebugLayerAfter(graph, layer);
+            InsertDebugLayerAfter(graph, layer, false);
         }
     }
 
@@ -31,7 +31,27 @@ protected:
     ~AddDebugImpl() = default;
 };
 
+class AddDebugToFileImpl
+{
+public:
+
+    void Run(Graph& graph, Layer& layer) const
+    {
+        if (layer.GetType() != LayerType::Debug && layer.GetType() != LayerType::Output)
+        {
+            // if the inputs/outputs of this layer do not have a debug layer
+            // insert the debug layer after them
+            InsertDebugLayerAfter(graph, layer, true);
+        }
+    }
+
+protected:
+    AddDebugToFileImpl() = default;
+    ~AddDebugToFileImpl() = default;
+};
+
 using InsertDebugLayer = OptimizeForType<Layer, AddDebugImpl>;
+using InsertDebugToFileLayer = OptimizeForType<Layer, AddDebugToFileImpl>;
 
 } // namespace optimizations
 } // namespace armnn
diff --git a/src/armnnUtils/Filesystem.cpp b/src/armnnUtils/Filesystem.cpp
index d917e508d5..1577d2d07c 100644
--- a/src/armnnUtils/Filesystem.cpp
+++ b/src/armnnUtils/Filesystem.cpp
@@ -5,6 +5,7 @@
 #if !defined(ARMNN_DISABLE_FILESYSTEM)
 
 #include <armnnUtils/Filesystem.hpp>
+#include "armnn/Exceptions.hpp"
 
 namespace armnnUtils
 {
@@ -32,6 +33,45 @@ fs::path NamedTempFile(const char* fileName)
     return namedTempFile;
 }
 
+/**
+ * @brief Construct a temporary directory
+ *
+ * Given a specified directory name construct a path in the
+ * system temporary directory. If the directory already exists, it is deleted,
+ * otherwise create it. This could throw filesystem_error exceptions.
+ *
+ * @param path is the path required in the temporary directory.
+ * @return path consisting of system temporary directory.
+ */
+std::string CreateDirectory(std::string path)
+{
+    fs::path tmpDir = fs::temp_directory_path();
+    mode_t permissions = 0733;
+    int result = 0;
+
+    std::string full_path = tmpDir.generic_string() + path;
+    if (fs::exists(full_path))
+    {
+        fs::remove_all(full_path);
+    }
+
+#if defined(_WIN32)
+    result = _mkdir(full_path.c_str()); // can be used on Windows
+    armnn::ConditionalThrow<armnn::RuntimeException>((result == 0), "Was unable to create temporary directory");
+#else
+    result = mkdir(full_path.c_str(), permissions);
+    armnn::ConditionalThrow<armnn::RuntimeException>((result == 0), "Was unable to create temporary directory");
+#endif
+
+    return full_path + "/";
+}
+
+FileContents ReadFileContentsIntoString(const std::string path) {
+    std::ifstream input_file(path);
+    armnn::ConditionalThrow<armnn::RuntimeException>((input_file.is_open()), "Could not read file contents");
+    return FileContents((std::istreambuf_iterator<char>(input_file)), std::istreambuf_iterator<char>());
+}
+
 } // namespace armnnUtils
 } // namespace Filesystem
 
diff --git a/src/backends/backendsCommon/test/layerTests/DebugTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/DebugTestImpl.cpp
index 9226f215aa..fa9a825313 100644
--- a/src/backends/backendsCommon/test/layerTests/DebugTestImpl.cpp
+++ b/src/backends/backendsCommon/test/layerTests/DebugTestImpl.cpp
@@ -6,15 +6,16 @@
 #include "DebugTestImpl.hpp"
 
 #include <armnnUtils/QuantizeHelper.hpp>
+#include <armnnUtils/Filesystem.hpp>
 #include <ResolveType.hpp>
 
-
 #include <armnnTestUtils/TensorCopyUtils.hpp>
 #include <armnnTestUtils/WorkloadTestUtils.hpp>
 
 #include <armnnTestUtils/TensorHelpers.hpp>
 
 #include <doctest/doctest.h>
+#include <armnnUtils/Filesystem.hpp>
 
 namespace
 {
@@ -29,6 +30,8 @@ LayerTestResult<T, Dim> DebugTestImpl(
     std::vector<float>& outputExpectedData,
     armnn::DebugQueueDescriptor descriptor,
     const std::string expectedStringOutput,
+    const std::string& layerName,
+    bool toFile,
     const float qScale = 1.0f,
     const int32_t qOffset = 0)
 {
@@ -65,15 +68,27 @@ LayerTestResult<T, Dim> DebugTestImpl(
 
     CopyDataToITensorHandle(inputHandle.get(), input.data());
 
-    std::ostringstream oss;
-    std::streambuf* coutStreambuf = std::cout.rdbuf();
-    std::cout.rdbuf(oss.rdbuf());
+    if (toFile)
+    {
+        fs::path tmpDir = fs::temp_directory_path();
+        std::string full_path = tmpDir.generic_string() + "/ArmNNIntermediateLayerOutputs/" + layerName + ".numpy";
 
-    ExecuteWorkload(*workload, memoryManager);
+        ExecuteWorkload(*workload, memoryManager);
 
-    std::cout.rdbuf(coutStreambuf);
+        armnnUtils::Filesystem::FileContents output = armnnUtils::Filesystem::ReadFileContentsIntoString(full_path);
+        CHECK((output == expectedStringOutput));
+    }
+    else
+    {
+        std::ostringstream oss;
+        std::streambuf* coutStreambuf = std::cout.rdbuf();
+        std::cout.rdbuf(oss.rdbuf());
 
-    CHECK(oss.str() == expectedStringOutput);
+        ExecuteWorkload(*workload, memoryManager);
+
+        std::cout.rdbuf(coutStreambuf);
+        CHECK(oss.str() == expectedStringOutput);
+    }
 
     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
 
@@ -86,7 +101,8 @@ LayerTestResult<T, Dim> DebugTestImpl(
 template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 4> Debug4dTest(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
     armnn::TensorInfo inputTensorInfo;
     armnn::TensorInfo outputTensorInfo;
@@ -98,6 +114,7 @@ LayerTestResult<T, 4> Debug4dTest(
     desc.m_Guid = 1;
     desc.m_LayerName = "TestOutput";
     desc.m_SlotIndex = 0;
+    desc.m_LayerOutputToFile = toFile;
 
     inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
     outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
@@ -133,13 +150,16 @@ LayerTestResult<T, 4> Debug4dTest(
                                input,
                                outputExpected,
                                desc,
-                               expectedStringOutput);
+                               expectedStringOutput,
+                               desc.m_LayerName,
+                               toFile);
 }
 
 template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 3> Debug3dTest(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
     armnn::TensorInfo inputTensorInfo;
     armnn::TensorInfo outputTensorInfo;
@@ -151,6 +171,7 @@ LayerTestResult<T, 3> Debug3dTest(
     desc.m_Guid = 1;
     desc.m_LayerName = "TestOutput";
     desc.m_SlotIndex = 0;
+    desc.m_LayerOutputToFile = toFile;
 
     inputTensorInfo = armnn::TensorInfo(3, inputShape, ArmnnType);
     outputTensorInfo = armnn::TensorInfo(3, outputShape, ArmnnType);
@@ -184,13 +205,16 @@ LayerTestResult<T, 3> Debug3dTest(
                                input,
                                outputExpected,
                                desc,
-                               expectedStringOutput);
+                               expectedStringOutput,
+                               desc.m_LayerName,
+                               toFile);
 }
 
 template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 2> Debug2dTest(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
     armnn::TensorInfo inputTensorInfo;
     armnn::TensorInfo outputTensorInfo;
@@ -202,6 +226,7 @@ LayerTestResult<T, 2> Debug2dTest(
     desc.m_Guid = 1;
     desc.m_LayerName = "TestOutput";
     desc.m_SlotIndex = 0;
+    desc.m_LayerOutputToFile = toFile;
 
     inputTensorInfo = armnn::TensorInfo(2, inputShape, ArmnnType);
     outputTensorInfo = armnn::TensorInfo(2, outputShape, ArmnnType);
@@ -233,13 +258,16 @@ LayerTestResult<T, 2> Debug2dTest(
                                input,
                                outputExpected,
                                desc,
-                               expectedStringOutput);
+                               expectedStringOutput,
+                               desc.m_LayerName,
+                               toFile);
 }
 
 template <armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 1> Debug1dTest(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
     armnn::TensorInfo inputTensorInfo;
     armnn::TensorInfo outputTensorInfo;
@@ -251,6 +279,7 @@ LayerTestResult<T, 1> Debug1dTest(
     desc.m_Guid = 1;
     desc.m_LayerName = "TestOutput";
     desc.m_SlotIndex = 0;
+    desc.m_LayerOutputToFile = toFile;
 
     inputTensorInfo = armnn::TensorInfo(1, inputShape, ArmnnType);
     outputTensorInfo = armnn::TensorInfo(1, outputShape, ArmnnType);
@@ -280,119 +309,137 @@ LayerTestResult<T, 1> Debug1dTest(
                                input,
                                outputExpected,
                                desc,
-                               expectedStringOutput);
+                               expectedStringOutput,
+                               desc.m_LayerName,
+                               toFile);
 }
 
 } // anonymous namespace
 
 LayerTestResult<float, 4> Debug4dFloat32Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug4dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+    return Debug4dTest<armnn::DataType::Float32>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<float, 3> Debug3dFloat32Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug3dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+    return Debug3dTest<armnn::DataType::Float32>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<float, 2> Debug2dFloat32Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug2dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+    return Debug2dTest<armnn::DataType::Float32>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<float, 1> Debug1dFloat32Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug1dTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
+    return Debug1dTest<armnn::DataType::Float32>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<armnn::BFloat16, 4> Debug4dBFloat16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug4dTest<armnn::DataType::BFloat16>(workloadFactory, memoryManager);
+    return Debug4dTest<armnn::DataType::BFloat16>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<armnn::BFloat16, 3> Debug3dBFloat16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug3dTest<armnn::DataType::BFloat16>(workloadFactory, memoryManager);
+    return Debug3dTest<armnn::DataType::BFloat16>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<armnn::BFloat16, 2> Debug2dBFloat16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug2dTest<armnn::DataType::BFloat16>(workloadFactory, memoryManager);
+    return Debug2dTest<armnn::DataType::BFloat16>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<armnn::BFloat16, 1> Debug1dBFloat16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug1dTest<armnn::DataType::BFloat16>(workloadFactory, memoryManager);
+    return Debug1dTest<armnn::DataType::BFloat16>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<uint8_t, 4> Debug4dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug4dTest<armnn::DataType::QAsymmU8>(workloadFactory, memoryManager);
+    return Debug4dTest<armnn::DataType::QAsymmU8>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<uint8_t, 3> Debug3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug3dTest<armnn::DataType::QAsymmU8>(workloadFactory, memoryManager);
+    return Debug3dTest<armnn::DataType::QAsymmU8>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<uint8_t, 2> Debug2dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug2dTest<armnn::DataType::QAsymmU8>(workloadFactory, memoryManager);
+    return Debug2dTest<armnn::DataType::QAsymmU8>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<uint8_t, 1> Debug1dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug1dTest<armnn::DataType::QAsymmU8>(workloadFactory, memoryManager);
+    return Debug1dTest<armnn::DataType::QAsymmU8>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<int16_t, 4> Debug4dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug4dTest<armnn::DataType::QSymmS16>(workloadFactory, memoryManager);
+    return Debug4dTest<armnn::DataType::QSymmS16>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<int16_t, 3> Debug3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug3dTest<armnn::DataType::QSymmS16>(workloadFactory, memoryManager);
+    return Debug3dTest<armnn::DataType::QSymmS16>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<int16_t, 2> Debug2dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug2dTest<armnn::DataType::QSymmS16>(workloadFactory, memoryManager);
+    return Debug2dTest<armnn::DataType::QSymmS16>(workloadFactory, memoryManager, toFile);
 }
 
 LayerTestResult<int16_t, 1> Debug1dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile = false)
 {
-    return Debug1dTest<armnn::DataType::QSymmS16>(workloadFactory, memoryManager);
+    return Debug1dTest<armnn::DataType::QSymmS16>(workloadFactory, memoryManager, toFile);
 }
diff --git a/src/backends/backendsCommon/test/layerTests/DebugTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/DebugTestImpl.hpp
index ac068e8939..250c658cbf 100644
--- a/src/backends/backendsCommon/test/layerTests/DebugTestImpl.hpp
+++ b/src/backends/backendsCommon/test/layerTests/DebugTestImpl.hpp
@@ -14,64 +14,80 @@
 
 LayerTestResult<float, 4> Debug4dFloat32Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<float, 3> Debug3dFloat32Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<float, 2> Debug2dFloat32Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<float, 1> Debug1dFloat32Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<armnn::BFloat16, 4> Debug4dBFloat16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<armnn::BFloat16, 3> Debug3dBFloat16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<armnn::BFloat16, 2> Debug2dBFloat16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<armnn::BFloat16, 1> Debug1dBFloat16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<uint8_t, 4> Debug4dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<uint8_t, 3> Debug3dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<uint8_t, 2> Debug2dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<uint8_t, 1> Debug1dUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<int16_t, 4> Debug4dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<int16_t, 3> Debug3dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<int16_t, 2> Debug2dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
 
 LayerTestResult<int16_t, 1> Debug1dInt16Test(
     armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    bool toFile);
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index ae40333658..7375847602 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -2250,25 +2250,46 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(StridedSlice2dInt16, StridedSlice2dInt16Test)
 ARMNN_AUTO_TEST_CASE_WITH_THF(StridedSlice2dReverseInt16, StridedSlice2dReverseInt16Test)
 
 // Debug
-ARMNN_AUTO_TEST_CASE(Debug4dFloat32, Debug4dFloat32Test)
-ARMNN_AUTO_TEST_CASE(Debug3dFloat32, Debug3dFloat32Test)
-ARMNN_AUTO_TEST_CASE(Debug2dFloat32, Debug2dFloat32Test)
-ARMNN_AUTO_TEST_CASE(Debug1dFloat32, Debug1dFloat32Test)
-
-ARMNN_AUTO_TEST_CASE(Debug4dBFloat16, Debug4dBFloat16Test)
-ARMNN_AUTO_TEST_CASE(Debug3dBFloat16, Debug3dBFloat16Test)
-ARMNN_AUTO_TEST_CASE(Debug2dBFloat16, Debug2dBFloat16Test)
-ARMNN_AUTO_TEST_CASE(Debug1dBFloat16, Debug1dBFloat16Test)
-
-ARMNN_AUTO_TEST_CASE(Debug4dUint8, Debug4dUint8Test)
-ARMNN_AUTO_TEST_CASE(Debug3dUint8, Debug3dUint8Test)
-ARMNN_AUTO_TEST_CASE(Debug2dUint8, Debug2dUint8Test)
-ARMNN_AUTO_TEST_CASE(Debug1dUint8, Debug1dUint8Test)
-
-ARMNN_AUTO_TEST_CASE(Debug4dQSymm16, Debug4dInt16Test)
-ARMNN_AUTO_TEST_CASE(Debug3dQSymm16, Debug3dInt16Test)
-ARMNN_AUTO_TEST_CASE(Debug2dQSymm16, Debug2dInt16Test)
-ARMNN_AUTO_TEST_CASE(Debug1dQSymm16, Debug1dInt16Test)
+ARMNN_AUTO_TEST_CASE(Debug4dFloat32, Debug4dFloat32Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug3dFloat32, Debug3dFloat32Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug2dFloat32, Debug2dFloat32Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug1dFloat32, Debug1dFloat32Test, /*toFile*/ false)
+
+ARMNN_AUTO_TEST_CASE(Debug4dBFloat16, Debug4dBFloat16Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug3dBFloat16, Debug3dBFloat16Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug2dBFloat16, Debug2dBFloat16Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug1dBFloat16, Debug1dBFloat16Test, /*toFile*/ false)
+
+ARMNN_AUTO_TEST_CASE(Debug4dUint8, Debug4dUint8Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug3dUint8, Debug3dUint8Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug2dUint8, Debug2dUint8Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug1dUint8, Debug1dUint8Test, /*toFile*/ false)
+
+ARMNN_AUTO_TEST_CASE(Debug4dQSymm16, Debug4dInt16Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug3dQSymm16, Debug3dInt16Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug2dQSymm16, Debug2dInt16Test, /*toFile*/ false)
+ARMNN_AUTO_TEST_CASE(Debug1dQSymm16, Debug1dInt16Test, /*toFile*/ false)
+
+// Debug To File
+ARMNN_AUTO_TEST_CASE(DebugToFile4dFloat32, Debug4dFloat32Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile3dFloat32, Debug3dFloat32Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile2dFloat32, Debug2dFloat32Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile1dFloat32, Debug1dFloat32Test, /*toFile*/ true)
+
+ARMNN_AUTO_TEST_CASE(DebugToFile4dBFloat16, Debug4dBFloat16Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile3dBFloat16, Debug3dBFloat16Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile2dBFloat16, Debug2dBFloat16Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile1dBFloat16, Debug1dBFloat16Test, /*toFile*/ true)
+
+ARMNN_AUTO_TEST_CASE(DebugToFile4dUint8, Debug4dUint8Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile3dUint8, Debug3dUint8Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile2dUint8, Debug2dUint8Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile1dUint8, Debug1dUint8Test, /*toFile*/ true)
+
+ARMNN_AUTO_TEST_CASE(DebugToFile4dQSymm16, Debug4dInt16Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile3dQSymm16, Debug3dInt16Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile2dQSymm16, Debug2dInt16Test, /*toFile*/ true)
+ARMNN_AUTO_TEST_CASE(DebugToFile1dQSymm16, Debug1dInt16Test, /*toFile*/ true)
 
 // Gather
 ARMNN_AUTO_TEST_CASE_WITH_THF(Gather1dParamsFloat32, Gather1dParamsFloat32Test)
diff --git a/src/backends/reference/workloads/Debug.cpp b/src/backends/reference/workloads/Debug.cpp
index 24000d45e6..fdadfef590 100644
--- a/src/backends/reference/workloads/Debug.cpp
+++ b/src/backends/reference/workloads/Debug.cpp
@@ -5,22 +5,27 @@
 
 #include "Debug.hpp"
 #include <common/include/ProfilingGuid.hpp>
+#include <armnnUtils/Filesystem.hpp>
 
 #include <BFloat16.hpp>
 #include <Half.hpp>
 
 #include <algorithm>
 #include <iostream>
+#include <iosfwd>
+#include <fstream>
+#include <sys/stat.h>
 
 namespace armnn
 {
 
-template <typename T>
-void Debug(const TensorInfo& inputInfo,
-           const T* inputData,
-           LayerGuid guid,
-           const std::string& layerName,
-           unsigned int slotIndex)
+template<typename T>
+void PrintOutput(const TensorInfo& inputInfo,
+                 const T* inputData,
+                 LayerGuid guid,
+                 const std::string& layerName,
+                 unsigned int slotIndex,
+                 std::ostream& os)
 {
     const unsigned int numDims = inputInfo.GetNumDimensions();
     const unsigned int numElements = inputInfo.GetNumElements();
@@ -34,30 +39,30 @@ void Debug(const TensorInfo& inputInfo,
         strides[numDims - i] = strides[numDims - i + 1] * inputShape[numDims - i];
     }
 
-    std::cout << "{ ";
-    std::cout << "\"layerGuid\": " << guid << ", ";
-    std::cout << "\"layerName\": \"" << layerName << "\", ";
-    std::cout << "\"outputSlot\": " << slotIndex << ", ";
-    std::cout << "\"shape\": ";
+    os << "{ ";
+    os << "\"layerGuid\": " << guid << ", ";
+    os << "\"layerName\": \"" << layerName << "\", ";
+    os << "\"outputSlot\": " << slotIndex << ", ";
+    os << "\"shape\": ";
 
-    std::cout << "[";
+    os << "[";
     for (unsigned int i = 0; i < numDims; i++)
     {
-        std::cout << inputShape[i];
+        os << inputShape[i];
         if (i != numDims - 1)
         {
-            std::cout << ", ";
+            os << ", ";
         }
     }
-    std::cout << "], ";
+    os << "], ";
 
-    std::cout << "\"min\": "
-        << static_cast<float>(*std::min_element(inputData, inputData + numElements)) << ", ";
+    os << "\"min\": "
+              << static_cast<float>(*std::min_element(inputData, inputData + numElements)) << ", ";
 
-    std::cout << "\"max\": "
-        << static_cast<float>(*std::max_element(inputData, inputData + numElements)) << ", ";
+    os << "\"max\": "
+              << static_cast<float>(*std::max_element(inputData, inputData + numElements)) << ", ";
 
-    std::cout << "\"data\": ";
+    os << "\"data\": ";
 
     for (unsigned int i = 0; i < numElements; i++)
     {
@@ -65,69 +70,96 @@ void Debug(const TensorInfo& inputInfo,
         {
             if (i % strides[j] == 0)
             {
-                std::cout << "[" ;
+                os << "[";
             }
         }
 
-        std::cout << static_cast<float>(inputData[i]);
+        os << static_cast<float>(inputData[i]);
 
         for (unsigned int j = 0; j < numDims; j++)
         {
-            if ((i+1) % strides[j] == 0)
+            if ((i + 1) % strides[j] == 0)
             {
-                std::cout << "]" ;
+                os << "]";
             }
         }
 
         if (i != numElements - 1)
         {
-            std::cout << ", ";
+            os << ", ";
         }
     }
 
-    std::cout << " }" << std::endl;
+    os << " }" << std::endl;
+}
+
+template<typename T>
+void Debug(const TensorInfo& inputInfo,
+           const T* inputData,
+           LayerGuid guid,
+           const std::string& layerName,
+           unsigned int slotIndex,
+           bool outputsToFile)
+{
+    if (outputsToFile)
+    {
+        auto rootPathToFile = armnnUtils::Filesystem::CreateDirectory("/ArmNNIntermediateLayerOutputs");
+        std::ofstream out(rootPathToFile + layerName + ".numpy");
+        PrintOutput<T>(inputInfo, inputData, guid, layerName, slotIndex, out);
+    }
+    else
+    {
+        PrintOutput<T>(inputInfo, inputData, guid, layerName, slotIndex, std::cout);
+    }
 }
 
 template void Debug<BFloat16>(const TensorInfo& inputInfo,
-                          const BFloat16* inputData,
-                          LayerGuid guid,
-                          const std::string& layerName,
-                          unsigned int slotIndex);
+                              const BFloat16* inputData,
+                              LayerGuid guid,
+                              const std::string& layerName,
+                              unsigned int slotIndex,
+                              bool outputsToFile);
 
 template void Debug<Half>(const TensorInfo& inputInfo,
                           const Half* inputData,
                           LayerGuid guid,
                           const std::string& layerName,
-                          unsigned int slotIndex);
+                          unsigned int slotIndex,
+                          bool outputsToFile);
 
 template void Debug<float>(const TensorInfo& inputInfo,
                            const float* inputData,
                            LayerGuid guid,
                            const std::string& layerName,
-                           unsigned int slotIndex);
+                           unsigned int slotIndex,
+                           bool outputsToFile);
 
 template void Debug<uint8_t>(const TensorInfo& inputInfo,
                              const uint8_t* inputData,
                              LayerGuid guid,
                              const std::string& layerName,
-                             unsigned int slotIndex);
+                             unsigned int slotIndex,
+                             bool outputsToFile);
 
 template void Debug<int8_t>(const TensorInfo& inputInfo,
-                             const int8_t* inputData,
-                             LayerGuid guid,
-                             const std::string& layerName,
-                             unsigned int slotIndex);
+                            const int8_t* inputData,
+                            LayerGuid guid,
+                            const std::string& layerName,
+                            unsigned int slotIndex,
+                            bool outputsToFile);
 
 template void Debug<int16_t>(const TensorInfo& inputInfo,
                              const int16_t* inputData,
                              LayerGuid guid,
                              const std::string& layerName,
-                             unsigned int slotIndex);
+                             unsigned int slotIndex,
+                             bool outputsToFile);
 
 template void Debug<int32_t>(const TensorInfo& inputInfo,
                              const int32_t* inputData,
                              LayerGuid guid,
                              const std::string& layerName,
-                             unsigned int slotIndex);
+                             unsigned int slotIndex,
+                             bool outputsToFile);
 
 } // namespace armnn
diff --git a/src/backends/reference/workloads/Debug.hpp b/src/backends/reference/workloads/Debug.hpp
index 3f9920c543..a8802d1524 100644
--- a/src/backends/reference/workloads/Debug.hpp
+++ b/src/backends/reference/workloads/Debug.hpp
@@ -8,12 +8,12 @@
 
 namespace armnn
 {
-
 template <typename T>
 void Debug(const TensorInfo& inputInfo,
            const T* inputData,
            LayerGuid guid,
            const std::string& layerName,
-           unsigned int slotIndex);
+           unsigned int slotIndex,
+           bool outputsToFile);
 
 } //namespace armnn
diff --git a/src/backends/reference/workloads/RefDebugWorkload.cpp b/src/backends/reference/workloads/RefDebugWorkload.cpp
index 48b519f809..db67b3a782 100644
--- a/src/backends/reference/workloads/RefDebugWorkload.cpp
+++ b/src/backends/reference/workloads/RefDebugWorkload.cpp
@@ -45,7 +45,7 @@ void RefDebugWorkload<DataType>::Execute(std::vector<ITensorHandle*> inputs) con
     }
     else
     {
-        Debug(inputInfo, inputData, m_Data.m_Guid, m_Data.m_LayerName, m_Data.m_SlotIndex);
+        Debug(inputInfo, inputData, m_Data.m_Guid, m_Data.m_LayerName, m_Data.m_SlotIndex, m_Data.m_LayerOutputToFile);
     }
 
     std::memcpy(outputData, inputData, inputInfo.GetNumElements()*sizeof(T));
diff --git a/tests/ExecuteNetwork/ArmNNExecutor.cpp b/tests/ExecuteNetwork/ArmNNExecutor.cpp
index 797c09a1b2..330a239763 100644
--- a/tests/ExecuteNetwork/ArmNNExecutor.cpp
+++ b/tests/ExecuteNetwork/ArmNNExecutor.cpp
@@ -510,6 +510,7 @@ armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* netw
     options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
     options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode;
     options.m_Debug = m_Params.m_PrintIntermediate;
+    options.m_DebugToFile = m_Params.m_PrintIntermediateOutputsToFile;
     options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
                                      armnn::ShapeInferenceMethod::InferAndValidate :
                                      armnn::ShapeInferenceMethod::ValidateOnly;
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
index 17c08717e4..155a4c4a8b 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
@@ -126,6 +126,7 @@ armnnDelegate::DelegateOptions ExecuteNetworkParams::ToDelegateOptions() const
     options.m_ReduceFp32ToFp16 = m_EnableFp16TurboMode;
     options.m_ReduceFp32ToBf16 = m_EnableBf16TurboMode;
     options.m_Debug = m_PrintIntermediate;
+    options.m_DebugToFile = m_PrintIntermediateOutputsToFile;
     options.m_ProfilingEnabled = m_EnableProfiling;
     delegateOptions.SetInternalProfilingParams(m_EnableProfiling, armnn::ProfilingDetailsMethod::DetailsWithEvents);
     options.m_shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly;
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
index e60e3b8877..020dbdcced 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
@@ -50,6 +50,7 @@ struct ExecuteNetworkParams
     std::vector<std::string>          m_OutputTensorFiles;
     bool                              m_ParseUnsupported = false;
     bool                              m_PrintIntermediate;
+    bool                              m_PrintIntermediateOutputsToFile;
     bool                              m_DontPrintOutputs;
     bool                              m_QuantizeInput;
     bool                              m_SaveCachedNetwork;
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index de7bc051c7..5f19a1498c 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -289,6 +289,11 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  cxxopts::value<bool>(m_ExNetParams.m_PrintIntermediate)->default_value("false")
                          ->implicit_value("true"))
 
+                ("F,print-intermediate-layers-to-file",
+                 "If this option is enabled, the output of every graph layer will be printed within separate files.",
+                 cxxopts::value<bool>(m_ExNetParams.m_PrintIntermediateOutputsToFile)->default_value("false")
+                         ->implicit_value("true"))
+
                 ("parse-unsupported",
                  "Add unsupported operators as stand-in layers (where supported by parser)",
                  cxxopts::value<bool>(m_ExNetParams.m_ParseUnsupported)->default_value("false")->implicit_value("true"))
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index 268f60301c..d837fc1fcf 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -68,6 +68,7 @@ struct Params
     bool                            m_EnableFp16TurboMode;
     bool                            m_EnableBf16TurboMode;
     bool                            m_PrintIntermediateLayers;
+    bool                            m_PrintIntermediateLayersToFile;
     bool                            m_ParseUnsupported;
     bool                            m_InferOutputShape;
     bool                            m_EnableFastMath;
@@ -91,6 +92,7 @@ struct Params
         , m_EnableFp16TurboMode(false)
         , m_EnableBf16TurboMode(false)
         , m_PrintIntermediateLayers(false)
+        , m_PrintIntermediateLayersToFile(false)
         , m_ParseUnsupported(false)
         , m_InferOutputShape(false)
         , m_EnableFastMath(false)
@@ -452,6 +454,7 @@ public:
             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
             options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
             options.m_Debug = params.m_PrintIntermediateLayers;
+            options.m_DebugToFile = params.m_PrintIntermediateLayersToFile;
             options.m_shapeInferenceMethod = params.m_InferOutputShape ?
                     armnn::ShapeInferenceMethod::InferAndValidate : armnn::ShapeInferenceMethod::ValidateOnly;
             options.m_ProfilingEnabled = m_EnableProfiling;
author	Keith Davis <keith.davis@arm.com>	2022-10-14 15:50:33 +0100
committer	KeithARM <keith.davis@arm.com>	2022-10-19 10:33:40 +0000
commit	15f9c68adef324cd0158cea3d021c0f6bef5eecf (patch)
tree	1cd48b345d182fd19efdc40a32e2540befd8f925
parent	7bbf56598010041ea46c3fa9d32604db777ee26e (diff)
download	armnn-15f9c68adef324cd0158cea3d021c0f6bef5eecf.tar.gz