11 #include <doctest/doctest.h> 15 TEST_CASE(
"FallbackImportToCpuAcc")
17 using namespace armnn;
22 CHECK((backendObjPtr !=
nullptr));
25 if (backendIds.find(
"MockRef") == backendIds.end())
27 std::string message =
"Cannot load MockRef";
85 std::string ignoredErrorMessage;
87 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
90 std::vector<float> inputData0
92 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
94 std::vector<float> inputData1
96 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
98 std::vector<float> inputData2
100 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
103 std::vector<float> outputData(12);
105 std::vector<float> expectedOutput
107 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
125 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
128 runtime->GetProfiler(netId)->EnableProfiling(
true);
131 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
135 std::stringstream ss;
137 std::string dump = ss.str();
140 std::size_t found = dump.find(
"ImportMemGeneric");
141 CHECK(found != std::string::npos);
144 found = dump.find(
"SyncMemGeneric");
145 CHECK(found != std::string::npos);
148 found = dump.find(
"CopyMemGeneric");
149 CHECK(found == std::string::npos);
155 CHECK(outputData == expectedOutput);
158 TEST_CASE(
"FallbackPaddingCopyToCpuAcc")
160 using namespace armnn;
165 CHECK((backendObjPtr !=
nullptr));
168 if (backendIds.find(
"MockRef") == backendIds.end())
170 std::string message =
"Cannot load MockRef";
226 std::string ignoredErrorMessage;
229 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
232 std::vector<float> inputData0
234 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
236 std::vector<float> inputData1
238 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
241 std::vector<float> outputData(2);
243 std::vector<float> expectedOutput
260 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
263 runtime->GetProfiler(netId)->EnableProfiling(
true);
266 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
270 std::stringstream ss;
272 std::string dump = ss.str();
275 std::size_t found = dump.find(
"CopyMemGeneric");
276 CHECK(found != std::string::npos);
279 found = dump.find(
"SyncMemGeneric");
280 CHECK(found != std::string::npos);
283 found = dump.find(
"ImportMemGeneric");
284 CHECK(found == std::string::npos);
290 CHECK(outputData == expectedOutput);
293 TEST_CASE(
"FallbackImportFromCpuAcc")
295 using namespace armnn;
300 CHECK((backendObjPtr !=
nullptr));
303 if (backendIds.find(
"MockRef") == backendIds.end())
305 std::string message =
"Cannot load MockRef";
363 std::string ignoredErrorMessage;
366 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
369 std::vector<float> inputData0
371 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
373 std::vector<float> inputData1
375 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
377 std::vector<float> inputData2
379 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
382 std::vector<float> outputData(12);
384 std::vector<float> expectedOutput
386 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
404 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
407 runtime->GetProfiler(netId)->EnableProfiling(
true);
410 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
414 std::stringstream ss;
416 std::string dump = ss.str();
419 std::size_t found = dump.find(
"ImportMemGeneric");
420 CHECK(found != std::string::npos);
423 found = dump.find(
"SyncMemGeneric");
424 CHECK(found != std::string::npos);
427 found = dump.find(
"CopyMemGeneric");
428 CHECK(found == std::string::npos);
434 CHECK(outputData == expectedOutput);
437 TEST_CASE(
"FallbackPaddingCopyFromCpuAcc")
439 using namespace armnn;
444 CHECK((backendObjPtr !=
nullptr));
447 if (backendIds.find(
"MockRef") == backendIds.end())
449 std::string message =
"Cannot load MockRef";
505 std::string ignoredErrorMessage;
508 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
511 std::vector<float> inputData0
513 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
515 std::vector<float> inputData1
520 std::vector<float> outputData(2);
522 std::vector<float> expectedOutput
539 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
542 runtime->GetProfiler(netId)->EnableProfiling(
true);
545 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
549 std::stringstream ss;
551 std::string dump = ss.str();
554 std::size_t found = dump.find(
"CopyMemGeneric");
555 CHECK(found != std::string::npos);
558 found = dump.find(
"SyncMemGeneric");
559 CHECK(found != std::string::npos);
562 found = dump.find(
"ImportMemGeneric");
563 CHECK(found == std::string::npos);
569 CHECK(outputData == expectedOutput);
572 TEST_CASE(
"FallbackDisableImportFromCpuAcc")
574 using namespace armnn;
579 CHECK((backendObjPtr !=
nullptr));
582 if (backendIds.find(
"MockRef") == backendIds.end())
584 std::string message =
"Cannot load MockRef";
640 std::string ignoredErrorMessage;
643 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
646 std::vector<float> inputData0
648 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
650 std::vector<float> inputData1
652 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
654 std::vector<float> inputData2
656 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
659 std::vector<float> outputData(12);
661 std::vector<float> expectedOutput
663 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
681 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
684 runtime->GetProfiler(netId)->EnableProfiling(
true);
687 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
691 std::stringstream ss;
693 std::string dump = ss.str();
696 std::size_t found = dump.find(
"CopyMemGeneric");
697 CHECK(found != std::string::npos);
700 found = dump.find(
"ImportMemGeneric");
701 CHECK(found == std::string::npos);
707 CHECK(outputData == expectedOutput);
710 #if defined(ARMCOMPUTECL_ENABLED) 711 TEST_CASE(
"NeonImportEnabledFallbackToCl")
713 using namespace armnn;
777 std::string ignoredErrorMessage;
781 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
784 std::vector<float> inputData0
786 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
788 std::vector<float> inputData1
790 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
792 std::vector<float> inputData2
794 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
797 std::vector<float> outputData(16);
799 std::vector<float> expectedOutput
801 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
806 size_t totalBytes = numElements *
sizeof(float);
809 const size_t alignment = 64;
810 size_t space = totalBytes + alignment + alignment;
811 auto inputData = std::make_unique<uint8_t[]>(space);
812 void* alignedInputPtr = inputData.get();
813 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
815 auto* intputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
816 std::copy(inputData2.begin(), inputData2.end(), intputPtr);
833 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
836 runtime->GetProfiler(netId)->EnableProfiling(
true);
839 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
843 std::stringstream ss;
845 std::string dump = ss.str();
848 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
849 CHECK(found != std::string::npos);
852 found = dump.find(
"CopyMemGeneric");
853 CHECK(found != std::string::npos);
856 for(
unsigned int i = 0; i < numElements; ++i)
858 CHECK(outputData[i] == expectedOutput[i]);
860 runtime->UnloadNetwork(netId);
863 TEST_CASE(
"NeonImportDisabledFallbackToCl")
865 using namespace armnn;
928 runtime->LoadNetwork(netId, std::move(optNet));
931 std::vector<float> inputData0
933 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
935 std::vector<float> inputData1
937 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
939 std::vector<float> inputData2
941 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
944 std::vector<float> outputData(12);
946 std::vector<float> expectedOutput
948 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
966 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
969 runtime->GetProfiler(netId)->EnableProfiling(
true);
972 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
976 std::stringstream ss;
978 std::string dump = ss.str();
981 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
982 CHECK(found != std::string::npos);
985 found = dump.find(
"CopyMemGeneric");
986 CHECK(found != std::string::npos);
989 CHECK(outputData == expectedOutput);
992 TEST_CASE(
"NeonImportEnabledFallbackSubgraphToCl")
994 using namespace armnn;
1073 std::string ignoredErrorMessage;
1077 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1080 std::vector<float> inputData0
1082 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
1084 std::vector<float> inputData1
1086 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
1088 std::vector<float> inputData2
1090 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
1093 std::vector<float> outputData(4);
1095 std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
1099 size_t totalBytes = numElements *
sizeof(float);
1100 const size_t alignment = 64;
1101 size_t space = totalBytes + alignment + alignment;
1102 auto inputData = std::make_unique<uint8_t[]>(space);
1103 void* alignedInputPtr = inputData.get();
1104 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
1106 auto* intputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
1107 std::copy(inputData2.begin(), inputData2.end(), intputPtr);
1124 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1127 runtime->GetProfiler(netId)->EnableProfiling(
true);
1130 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1134 std::stringstream ss;
1136 std::string dump = ss.str();
1139 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
1140 CHECK(found != std::string::npos);
1143 found = dump.find(
"NeonPooling2dWorkload_Execute");
1144 CHECK(found != std::string::npos);
1147 found = dump.find(
"CopyMemGeneric");
1148 CHECK(found != std::string::npos);
1151 found = dump.find(
"SyncMemGeneric");
1152 CHECK(found != std::string::npos);
1155 CHECK(outputData == expectedOutput);
1156 runtime->UnloadNetwork(netId);
1159 TEST_CASE(
"NeonImportDisableFallbackSubgraphToCl")
1161 using namespace armnn;
1235 runtime->LoadNetwork(netId, std::move(optNet));
1238 std::vector<float> inputData0
1240 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1242 std::vector<float> inputData1
1244 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1246 std::vector<float> inputData2
1248 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1251 std::vector<float> outputData(2);
1253 std::vector<float> expectedOutput{ 11.0f, -1.0f };
1270 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1273 runtime->GetProfiler(netId)->EnableProfiling(
true);
1276 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1280 std::stringstream ss;
1282 std::string dump = ss.str();
1285 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
1286 CHECK(found != std::string::npos);
1289 found = dump.find(
"NeonPooling2dWorkload_Execute");
1290 CHECK(found != std::string::npos);
1293 found = dump.find(
"CopyMemGeneric");
1294 CHECK(found != std::string::npos);
1297 CHECK(outputData == expectedOutput);
static IRuntimePtr Create(const CreationOptions &options)
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
BackendIdSet GetBackendIds() const
std::unordered_set< BackendId > BackendIdSet
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
static ProfilerManager & GetInstance()
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
uint32_t m_PoolWidth
Pooling width value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
constexpr const char * MockImportBackendId()
IProfiler * GetProfiler()
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
uint32_t m_PoolHeight
Pooling height value.
TEST_SUITE("NeonFallback")
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
const BackendId & GetBackendId() const
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
CPU Execution: NEON: ArmCompute.
armnn::IBackendInternalUniquePtr CreateBackendObject(const armnn::BackendId &backendId)
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
unsigned int GetNumElements() const