11 #include <doctest/doctest.h> 15 TEST_CASE(
"FallbackImportToCpuAcc")
17 using namespace armnn;
22 CHECK((backendObjPtr !=
nullptr));
25 if (backendIds.find(
"MockRef") == backendIds.end())
27 std::string message =
"Cannot load MockRef";
85 std::string ignoredErrorMessage;
87 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
90 std::vector<float> inputData0
92 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
94 std::vector<float> inputData1
96 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
98 std::vector<float> inputData2
100 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
103 std::vector<float> outputData(12);
105 std::vector<float> expectedOutput
107 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
118 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
121 runtime->GetProfiler(netId)->EnableProfiling(
true);
124 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
128 std::stringstream ss;
130 std::string dump = ss.str();
133 std::size_t found = dump.find(
"ImportMemGeneric");
134 CHECK(found != std::string::npos);
137 found = dump.find(
"SyncMemGeneric");
138 CHECK(found != std::string::npos);
141 found = dump.find(
"CopyMemGeneric");
142 CHECK(found == std::string::npos);
148 CHECK(outputData == expectedOutput);
151 TEST_CASE(
"FallbackPaddingCopyToCpuAcc")
153 using namespace armnn;
158 CHECK((backendObjPtr !=
nullptr));
161 if (backendIds.find(
"MockRef") == backendIds.end())
163 std::string message =
"Cannot load MockRef";
219 std::string ignoredErrorMessage;
222 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
225 std::vector<float> inputData0
227 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
229 std::vector<float> inputData1
231 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
234 std::vector<float> outputData(2);
236 std::vector<float> expectedOutput
248 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
251 runtime->GetProfiler(netId)->EnableProfiling(
true);
254 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
258 std::stringstream ss;
260 std::string dump = ss.str();
263 std::size_t found = dump.find(
"CopyMemGeneric");
264 CHECK(found != std::string::npos);
267 found = dump.find(
"SyncMemGeneric");
268 CHECK(found != std::string::npos);
271 found = dump.find(
"ImportMemGeneric");
272 CHECK(found == std::string::npos);
278 CHECK(outputData == expectedOutput);
281 TEST_CASE(
"FallbackImportFromCpuAcc")
283 using namespace armnn;
288 CHECK((backendObjPtr !=
nullptr));
291 if (backendIds.find(
"MockRef") == backendIds.end())
293 std::string message =
"Cannot load MockRef";
351 std::string ignoredErrorMessage;
354 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
357 std::vector<float> inputData0
359 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
361 std::vector<float> inputData1
363 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
365 std::vector<float> inputData2
367 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
370 std::vector<float> outputData(12);
372 std::vector<float> expectedOutput
374 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
385 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
388 runtime->GetProfiler(netId)->EnableProfiling(
true);
391 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
395 std::stringstream ss;
397 std::string dump = ss.str();
400 std::size_t found = dump.find(
"ImportMemGeneric");
401 CHECK(found != std::string::npos);
404 found = dump.find(
"SyncMemGeneric");
405 CHECK(found != std::string::npos);
408 found = dump.find(
"CopyMemGeneric");
409 CHECK(found == std::string::npos);
415 CHECK(outputData == expectedOutput);
418 TEST_CASE(
"FallbackPaddingCopyFromCpuAcc")
420 using namespace armnn;
425 CHECK((backendObjPtr !=
nullptr));
428 if (backendIds.find(
"MockRef") == backendIds.end())
430 std::string message =
"Cannot load MockRef";
486 std::string ignoredErrorMessage;
489 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
492 std::vector<float> inputData0
494 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
496 std::vector<float> inputData1
501 std::vector<float> outputData(2);
503 std::vector<float> expectedOutput
515 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
518 runtime->GetProfiler(netId)->EnableProfiling(
true);
521 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
525 std::stringstream ss;
527 std::string dump = ss.str();
530 std::size_t found = dump.find(
"CopyMemGeneric");
531 CHECK(found != std::string::npos);
534 found = dump.find(
"SyncMemGeneric");
535 CHECK(found != std::string::npos);
538 found = dump.find(
"ImportMemGeneric");
539 CHECK(found == std::string::npos);
545 CHECK(outputData == expectedOutput);
548 TEST_CASE(
"FallbackDisableImportFromCpuAcc")
550 using namespace armnn;
555 CHECK((backendObjPtr !=
nullptr));
558 if (backendIds.find(
"MockRef") == backendIds.end())
560 std::string message =
"Cannot load MockRef";
616 std::string ignoredErrorMessage;
619 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
622 std::vector<float> inputData0
624 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
626 std::vector<float> inputData1
628 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
630 std::vector<float> inputData2
632 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
635 std::vector<float> outputData(12);
637 std::vector<float> expectedOutput
639 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
650 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
653 runtime->GetProfiler(netId)->EnableProfiling(
true);
656 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
660 std::stringstream ss;
662 std::string dump = ss.str();
665 std::size_t found = dump.find(
"CopyMemGeneric");
666 CHECK(found != std::string::npos);
669 found = dump.find(
"ImportMemGeneric");
670 CHECK(found == std::string::npos);
676 CHECK(outputData == expectedOutput);
679 #if defined(ARMCOMPUTECL_ENABLED) 680 TEST_CASE(
"NeonImportEnabledFallbackToCl")
682 using namespace armnn;
746 std::string ignoredErrorMessage;
750 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
753 std::vector<float> inputData0
755 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
757 std::vector<float> inputData1
759 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
761 std::vector<float> inputData2
763 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
766 std::vector<float> outputData(16);
768 std::vector<float> expectedOutput
770 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
775 size_t totalBytes = numElements *
sizeof(float);
778 const size_t alignment = 64;
779 size_t space = totalBytes + alignment + alignment;
780 auto inputData = std::make_unique<uint8_t[]>(space);
781 void* alignedInputPtr = inputData.get();
782 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
784 auto* intputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
785 std::copy(inputData2.begin(), inputData2.end(), intputPtr);
795 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
798 runtime->GetProfiler(netId)->EnableProfiling(
true);
801 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
805 std::stringstream ss;
807 std::string dump = ss.str();
810 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
811 CHECK(found != std::string::npos);
814 found = dump.find(
"CopyMemGeneric");
815 CHECK(found != std::string::npos);
818 for(
unsigned int i = 0; i < numElements; ++i)
820 CHECK(outputData[i] == expectedOutput[i]);
822 runtime->UnloadNetwork(netId);
825 TEST_CASE(
"NeonImportDisabledFallbackToCl")
827 using namespace armnn;
890 runtime->LoadNetwork(netId, std::move(optNet));
893 std::vector<float> inputData0
895 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
897 std::vector<float> inputData1
899 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
901 std::vector<float> inputData2
903 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
906 std::vector<float> outputData(12);
908 std::vector<float> expectedOutput
910 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
921 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
924 runtime->GetProfiler(netId)->EnableProfiling(
true);
927 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
931 std::stringstream ss;
933 std::string dump = ss.str();
936 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
937 CHECK(found != std::string::npos);
940 found = dump.find(
"CopyMemGeneric");
941 CHECK(found != std::string::npos);
944 CHECK(outputData == expectedOutput);
947 TEST_CASE(
"NeonImportEnabledFallbackSubgraphToCl")
949 using namespace armnn;
1028 std::string ignoredErrorMessage;
1032 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1035 std::vector<float> inputData0
1037 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
1039 std::vector<float> inputData1
1041 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
1043 std::vector<float> inputData2
1045 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
1048 std::vector<float> outputData(4);
1050 std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
1054 size_t totalBytes = numElements *
sizeof(float);
1055 const size_t alignment = 64;
1056 size_t space = totalBytes + alignment + alignment;
1057 auto inputData = std::make_unique<uint8_t[]>(space);
1058 void* alignedInputPtr = inputData.get();
1059 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
1061 auto* intputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
1062 std::copy(inputData2.begin(), inputData2.end(), intputPtr);
1072 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1075 runtime->GetProfiler(netId)->EnableProfiling(
true);
1078 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1082 std::stringstream ss;
1084 std::string dump = ss.str();
1087 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
1088 CHECK(found != std::string::npos);
1091 found = dump.find(
"NeonPooling2dWorkload_Execute");
1092 CHECK(found != std::string::npos);
1095 found = dump.find(
"CopyMemGeneric");
1096 CHECK(found != std::string::npos);
1099 found = dump.find(
"SyncMemGeneric");
1100 CHECK(found != std::string::npos);
1103 CHECK(outputData == expectedOutput);
1104 runtime->UnloadNetwork(netId);
1107 TEST_CASE(
"NeonImportDisableFallbackSubgraphToCl")
1109 using namespace armnn;
1183 runtime->LoadNetwork(netId, std::move(optNet));
1186 std::vector<float> inputData0
1188 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1190 std::vector<float> inputData1
1192 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1194 std::vector<float> inputData2
1196 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1199 std::vector<float> outputData(2);
1201 std::vector<float> expectedOutput{ 11.0f, -1.0f };
1211 { 0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1214 runtime->GetProfiler(netId)->EnableProfiling(
true);
1217 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1221 std::stringstream ss;
1223 std::string dump = ss.str();
1226 std::size_t found = dump.find(
"ClSubtractionWorkload_Execute");
1227 CHECK(found != std::string::npos);
1230 found = dump.find(
"NeonPooling2dWorkload_Execute");
1231 CHECK(found != std::string::npos);
1234 found = dump.find(
"CopyMemGeneric");
1235 CHECK(found != std::string::npos);
1238 CHECK(outputData == expectedOutput);
static IRuntimePtr Create(const CreationOptions &options)
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
BackendIdSet GetBackendIds() const
std::unordered_set< BackendId > BackendIdSet
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
static ProfilerManager & GetInstance()
bool CheckOrder(const armnn::Graph &graph, const armnn::Layer *first, const armnn::Layer *second)
Checks that first comes before second in the order.
virtual void BackendSelectionHint(Optional< BackendId > backend)=0
Provide a hint for the optimizer as to which backend to prefer for this layer.
uint32_t m_PoolWidth
Pooling width value.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Copyright (c) 2021 ARM Limited and Contributors.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
constexpr const char * MockImportBackendId()
IProfiler * GetProfiler()
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
uint32_t m_PoolHeight
Pooling height value.
TEST_SUITE("NeonFallback")
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
GPU Execution: OpenCL: ArmCompute.
const BackendId & GetBackendId() const
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
CPU Execution: NEON: ArmCompute.
armnn::IBackendInternalUniquePtr CreateBackendObject(const armnn::BackendId &backendId)
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
A Pooling2dDescriptor for the Pooling2dLayer.
static INetworkPtr Create(NetworkOptions networkOptions={})
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
unsigned int GetNumElements() const