6 #include <arm_compute/runtime/CL/functions/CLActivationLayer.h> 12 #include <doctest/doctest.h> 18 using namespace armnn;
28 unsigned int numElements =
info.GetNumElements();
34 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
37 const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
38 arm_compute::CLActivationLayer act_func;
39 act_func.configure(&tensor,
nullptr, act_info);
42 const size_t totalBytes = tensor.info()->total_size();
43 const size_t alignment =
44 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
45 size_t space = totalBytes + alignment + alignment;
46 auto testData = std::make_unique<uint8_t[]>(space);
47 void* alignedPtr = testData.get();
48 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
54 auto* typedPtr =
reinterpret_cast<float*
>(alignedPtr);
55 std::fill_n(typedPtr, numElements, -5.0f);
59 arm_compute::CLScheduler::get().sync();
62 for(
unsigned int i = 0; i < numElements; ++i)
64 CHECK(typedPtr[i] == 0);
79 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
82 const size_t totalBytes = tensor.info()->total_size();
83 const size_t alignment =
84 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
85 size_t space = totalBytes + alignment + alignment;
86 auto testData = std::make_unique<uint8_t[]>(space);
87 void* alignedPtr = testData.get();
88 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
98 static_cast<MemorySourceFlags>(invalidMemSource));
106 std::vector<float> inputData
108 1.0f, 2.0f, 3.0f, 4.0f
128 IConnectableLayer* activation = net->AddActivationLayer(descriptor,
"Activation");
137 size_t totalBytes = numElements *
sizeof(float);
151 std::string ignoredErrorMessage;
154 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
157 const size_t alignment =
158 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
159 size_t space = totalBytes + alignment + alignment;
160 auto inputData = std::make_unique<uint8_t[]>(space);
161 void* alignedInputPtr = inputData.get();
162 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
165 auto* intputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
166 std::fill_n(intputPtr, numElements, -5.0f);
168 auto outputData = std::make_unique<uint8_t[]>(space);
169 void* alignedOutputPtr = outputData.get();
170 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
171 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
172 std::fill_n(outputPtr, numElements, -10.0f);
174 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
182 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
185 runtime->GetProfiler(netId)->EnableProfiling(
true);
188 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
192 std::stringstream ss;
194 std::string dump = ss.str();
197 std::size_t found = dump.find(
"ActivationWorkload");
198 CHECK(found != std::string::npos);
201 found = dump.find(
"SyncMemGeneric");
202 CHECK(found != std::string::npos);
205 found = dump.find(
"CopyMemGeneric");
206 CHECK(found == std::string::npos);
208 runtime->UnloadNetwork(netId);
212 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
214 for(
unsigned int i = 0; i < numElements; ++i)
216 CHECK(outputResult[i] >= 0);
231 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
234 const size_t totalBytes = tensor.info()->total_size();
235 const size_t alignment =
236 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
237 size_t space = totalBytes + alignment + alignment;
238 auto testData = std::make_unique<uint8_t[]>(space);
239 void* alignedPtr = testData.get();
240 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
247 TEST_CASE(
"ClCanBeImportedAlignedMemory")
257 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
260 const size_t totalBytes = tensor.info()->total_size();
261 const size_t alignment =
262 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
263 size_t space = totalBytes + alignment + alignment;
264 auto testData = std::make_unique<uint8_t[]>(space);
265 void* alignedPtr = testData.get();
266 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
292 std::vector<float> kernel =
299 const std::vector<float> expectedOutput =
306 unsigned int numElements = inputInfo.GetNumElements();
307 size_t totalBytes = numElements *
sizeof(float);
331 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
332 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
336 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
347 std::string ignoredErrorMessage;
350 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
353 const size_t alignment =
354 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
355 size_t space = totalBytes + alignment + alignment;
356 auto inputData = std::make_unique<uint8_t[]>(space);
357 void* alignedInputPtr = inputData.get();
358 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
361 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
376 auto outputData = std::make_unique<uint8_t[]>(space);
377 void* alignedOutputPtr = outputData.get();
378 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
379 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
380 std::fill_n(outputPtr, numElements, -10.0f);
382 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
390 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
393 runtime->GetProfiler(netId)->EnableProfiling(
true);
395 INFO(
"Run ImportInputs");
396 std::vector<ImportedInputId> importedInputIds =
398 std::vector<ImportedOutputId> importedOutputIds =
402 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
406 std::stringstream ss;
408 std::string dump = ss.str();
411 std::size_t found = dump.find(
"Convolution2dWorkload");
412 CHECK(found != std::string::npos);
415 found = dump.find(
"SyncMemGeneric");
416 CHECK(found != std::string::npos);
419 found = dump.find(
"CopyMemGeneric");
420 CHECK(found == std::string::npos);
422 runtime->UnloadNetwork(netId);
426 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
430 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
435 using namespace half_float::literal;
447 std::vector<float> expectedOutput =
449 -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f,
450 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f
454 size_t totalBytesInput = numElements *
sizeof(
Half);
455 size_t totalBytesOutput = numElements *
sizeof(float);
463 inputLayer->GetOutputSlot(0).Connect(convLayer->
GetInputSlot(0));
464 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
479 std::string ignoredErrorMessage;
482 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
485 const size_t alignment =
486 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
487 size_t spaceInput = totalBytesInput + alignment + alignment;
488 size_t spaceOutput = totalBytesOutput + alignment + alignment;
489 auto inputData = std::make_unique<uint8_t[]>(spaceInput);
490 void* alignedInputPtr = inputData.get();
491 CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput));
494 auto* inputPtr =
reinterpret_cast<Half*
>(alignedInputPtr);
495 inputPtr[0] = -37.5_h;
496 inputPtr[1] = -15.2_h;
497 inputPtr[2] = -8.76_h;
498 inputPtr[3] = -2.0_h;
499 inputPtr[4] = -1.5_h;
500 inputPtr[5] = -1.3_h;
501 inputPtr[6] = -0.5_h;
502 inputPtr[7] = -0.4_h;
505 inputPtr[10] = 0.4_h;
506 inputPtr[11] = 0.5_h;
507 inputPtr[12] = 1.3_h;
508 inputPtr[13] = 1.5_h;
509 inputPtr[14] = 2.0_h;
510 inputPtr[15] = 8.76_h;
511 inputPtr[16] = 15.2_h;
512 inputPtr[17] = 37.5_h;
514 auto outputData = std::make_unique<uint8_t[]>(spaceOutput);
515 void* alignedOutputPtr = outputData.get();
516 CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput));
517 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
518 std::fill_n(outputPtr, numElements, -10.0f);
520 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
528 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
531 runtime->GetProfiler(netId)->EnableProfiling(
true);
533 INFO(
"Run ImportInputs");
534 std::vector<ImportedInputId> importedInputIds =
536 std::vector<ImportedOutputId> importedOutputIds =
540 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
544 std::stringstream ss;
546 std::string dump = ss.str();
549 std::size_t found = dump.find(
"ConvertFp16ToFp32Workload");
550 CHECK(found != std::string::npos);
553 found = dump.find(
"SyncMemGeneric");
554 CHECK(found != std::string::npos);
557 found = dump.find(
"CopyMemGeneric");
558 CHECK(found == std::string::npos);
560 runtime->UnloadNetwork(netId);
564 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
568 for (
size_t i = 0; i < numElements; ++i)
570 DOCTEST_CHECK_MESSAGE(outputResult[i] == doctest::Approx(expectedOutput[i]).epsilon(0.0004),
571 "outputValue[" << i <<
"]: " << outputResult[i] <<
" != " << expectedOutput[i]);
578 using namespace half_float::literal;
590 std::vector<Half> expectedOutput =
592 -37.5_h, -15.2_h, -8.76_h, -2.0_h, -1.5_h, -1.3_h, -0.5_h, -0.4_h, 0.0_h,
593 1.0_h, 0.4_h, 0.5_h, 1.3_h, 1.5_h, 2.0_h, 8.76_h, 15.2_h, 37.5_h
597 size_t totalBytesInput = numElements *
sizeof(float);
598 size_t totalBytesOutput = numElements *
sizeof(
Half);
606 inputLayer->GetOutputSlot(0).Connect(convLayer->
GetInputSlot(0));
607 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
622 std::string ignoredErrorMessage;
625 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
628 const size_t alignment =
629 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
630 size_t spaceInput = totalBytesInput + alignment + alignment;
631 size_t spaceOutput = totalBytesOutput + alignment + alignment;
632 auto inputData = std::make_unique<uint8_t[]>(spaceInput);
633 void* alignedInputPtr = inputData.get();
634 CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput));
637 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
638 inputPtr[0] = -37.5f;
639 inputPtr[1] = -15.2f;
640 inputPtr[2] = -8.76f;
653 inputPtr[15] = 8.76f;
654 inputPtr[16] = 15.2f;
655 inputPtr[17] = 37.5f;
657 auto outputData = std::make_unique<uint8_t[]>(spaceOutput);
658 void* alignedOutputPtr = outputData.get();
659 CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput));
660 auto* outputPtr =
reinterpret_cast<Half*
>(alignedOutputPtr);
661 std::fill_n(outputPtr, numElements, -10.0f);
663 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
671 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
674 runtime->GetProfiler(netId)->EnableProfiling(
true);
676 INFO(
"Run ImportInputs");
677 std::vector<ImportedInputId> importedInputIds =
679 std::vector<ImportedOutputId> importedOutputIds =
683 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
687 std::stringstream ss;
689 std::string dump = ss.str();
692 std::size_t found = dump.find(
"ConvertFp32ToFp16Workload");
693 CHECK(found != std::string::npos);
696 found = dump.find(
"SyncMemGeneric");
697 CHECK(found != std::string::npos);
700 found = dump.find(
"CopyMemGeneric");
701 CHECK(found == std::string::npos);
703 runtime->UnloadNetwork(netId);
707 auto* outputResult =
reinterpret_cast<Half*
>(alignedOutputPtr);
711 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
716 using namespace half_float::literal;
728 std::vector<Half> expectedOutput = { 1.0_h };
731 size_t totalBytesInput = numElements *
sizeof(float);
732 size_t totalBytesOutput = numElements *
sizeof(
Half);
740 inputLayer->GetOutputSlot(0).Connect(convLayer->
GetInputSlot(0));
741 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
756 std::string ignoredErrorMessage;
759 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
762 const size_t alignment =
763 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
764 size_t spaceInput = totalBytesInput + alignment + alignment;
765 size_t spaceOutput = totalBytesOutput + alignment + alignment;
766 auto inputData = std::make_unique<uint8_t[]>(spaceInput);
767 void* alignedInputPtr = inputData.get();
768 CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput));
771 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
774 auto outputData = std::make_unique<uint8_t[]>(spaceOutput);
775 void* alignedOutputPtr = outputData.get();
776 CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput));
777 auto* outputPtr =
reinterpret_cast<Half*
>(alignedOutputPtr);
778 std::fill_n(outputPtr, numElements, -10.0f);
780 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
788 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
791 runtime->GetProfiler(netId)->EnableProfiling(
true);
793 INFO(
"Run ImportInputs");
794 std::vector<ImportedInputId> importedInputIds =
796 std::vector<ImportedOutputId> importedOutputIds =
800 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
804 std::stringstream ss;
806 std::string dump = ss.str();
809 std::size_t found = dump.find(
"ConvertFp32ToFp16Workload");
810 CHECK(found != std::string::npos);
813 found = dump.find(
"SyncMemGeneric");
814 CHECK(found != std::string::npos);
817 found = dump.find(
"CopyMemGeneric");
818 CHECK(found == std::string::npos);
820 runtime->UnloadNetwork(netId);
824 auto* outputResult =
reinterpret_cast<Half*
>(alignedOutputPtr);
828 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
851 std::vector<float> kernel =
858 const std::vector<float> expectedOutput =
865 unsigned int numElements = inputInfo.GetNumElements();
866 size_t totalBytes = numElements *
sizeof(float);
889 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
890 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
894 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
905 std::string ignoredErrorMessage;
908 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
911 const size_t alignment =
912 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
913 size_t space = totalBytes + alignment + alignment;
914 auto inputData = std::make_unique<uint8_t[]>(space);
915 void* alignedInputPtr = inputData.get();
916 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
919 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
934 auto outputData = std::make_unique<uint8_t[]>(space);
935 void* alignedOutputPtr = outputData.get();
936 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
937 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
938 std::fill_n(outputPtr, numElements, -10.0f);
940 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
948 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
951 runtime->GetProfiler(netId)->EnableProfiling(
true);
953 INFO(
"Run ImportInputs");
954 std::vector<ImportedInputId> importedInputIds =
956 std::vector<ImportedOutputId> importedOutputIds =
960 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
964 std::stringstream ss;
966 std::string dump = ss.str();
969 std::size_t found = dump.find(
"Convolution2dWorkload");
970 CHECK(found != std::string::npos);
973 found = dump.find(
"SyncMemGeneric");
974 CHECK(found != std::string::npos);
977 found = dump.find(
"CopyMemGeneric");
978 CHECK(found == std::string::npos);
981 arm_compute::CLScheduler::get().sync();
984 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
986 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
991 auto inputDataCopy = std::make_unique<uint8_t[]>(space);
992 void* copyInputPtr = inputDataCopy.get();
995 auto* inputCopyPtr =
reinterpret_cast<float*
>(copyInputPtr);
1000 inputCopyPtr[4] = 8;
1001 inputCopyPtr[5] = 7;
1002 inputCopyPtr[6] = 3;
1003 inputCopyPtr[7] = 6;
1004 inputCopyPtr[8] = 3;
1005 inputCopyPtr[9] = 3;
1006 inputCopyPtr[10] = 9;
1007 inputCopyPtr[11] = 1;
1010 auto outputDataCopy = std::make_unique<uint8_t[]>(space);
1011 void* copyOutputPtr = outputDataCopy.get();
1012 auto* outputCopyPtr =
reinterpret_cast<float*
>(copyOutputPtr);
1013 std::fill_n(outputCopyPtr, numElements, -10.0f);
1021 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), copyOutputPtr)}
1025 runtime->EnqueueWorkload(netId, inputTensorsCopy, outputTensorsCopy);
1027 arm_compute::CLScheduler::get().sync();
1030 outputResult =
reinterpret_cast<float*
>(copyOutputPtr);
1031 CHECK(outputResult);
1032 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1039 found = dump.find(
"Convolution2dWorkload");
1040 CHECK(found != std::string::npos);
1043 found = dump.find(
"SyncMemGeneric");
1044 CHECK(found != std::string::npos);
1047 found = dump.find(
"CopyMemGeneric");
1048 CHECK(found != std::string::npos);
1049 runtime->UnloadNetwork(netId);
1071 std::vector<float> kernel =
1078 const std::vector<float> expectedOutput =
1085 unsigned int numElements = inputInfo.GetNumElements();
1086 size_t totalBytes = numElements *
sizeof(float);
1109 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
1110 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1114 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1125 std::string ignoredErrorMessage;
1128 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1131 const size_t alignment =
1132 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
1133 size_t space = totalBytes + alignment + alignment;
1134 auto inputData = std::make_unique<uint8_t[]>(space);
1135 void* copyInputPtr = inputData.get();
1138 auto* inputPtr =
reinterpret_cast<float*
>(copyInputPtr);
1153 auto outputData = std::make_unique<uint8_t[]>(space);
1154 void* copyOutputPtr = outputData.get();
1155 auto* outputPtr =
reinterpret_cast<float*
>(copyOutputPtr);
1156 std::fill_n(outputPtr, numElements, -10.0f);
1158 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
1166 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), copyOutputPtr)}
1169 runtime->GetProfiler(netId)->EnableProfiling(
true);
1172 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1176 std::stringstream ss;
1178 std::string dump = ss.str();
1181 std::size_t found = dump.find(
"Convolution2dWorkload");
1182 CHECK(found != std::string::npos);
1185 found = dump.find(
"SyncMemGeneric");
1186 CHECK(found == std::string::npos);
1189 found = dump.find(
"CopyMemGeneric");
1190 CHECK(found != std::string::npos);
1193 arm_compute::CLScheduler::get().sync();
1196 auto* outputResult =
reinterpret_cast<float*
>(copyOutputPtr);
1197 CHECK(outputResult);
1198 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1203 auto inputDataImport = std::make_unique<uint8_t[]>(space);
1204 void* alignedInputImportPtr = inputDataImport.get();
1205 CHECK(std::align(alignment, totalBytes, alignedInputImportPtr, space));
1208 auto* inputImportPtr =
reinterpret_cast<float*
>(alignedInputImportPtr);
1209 inputImportPtr[0] = 1;
1210 inputImportPtr[1] = 5;
1211 inputImportPtr[2] = 2;
1212 inputImportPtr[3] = 3;
1213 inputImportPtr[4] = 8;
1214 inputImportPtr[5] = 7;
1215 inputImportPtr[6] = 3;
1216 inputImportPtr[7] = 6;
1217 inputImportPtr[8] = 3;
1218 inputImportPtr[9] = 3;
1219 inputImportPtr[10] = 9;
1220 inputImportPtr[11] = 1;
1223 auto outputDataImport = std::make_unique<uint8_t[]>(space);
1224 void* alignedOutputImportPtr = outputDataImport.get();
1225 CHECK(std::align(alignment, totalBytes, alignedOutputImportPtr, space));
1226 auto* outputImportPtr =
reinterpret_cast<float*
>(alignedOutputImportPtr);
1227 std::fill_n(outputImportPtr, numElements, -10.0f);
1235 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputImportPtr)}
1238 INFO(
"Run ImportInputs");
1239 std::vector<ImportedInputId> importedInputIds =
1241 std::vector<ImportedOutputId> importedOutputIds =
1245 runtime->EnqueueWorkload(netId, inputTensorsImport, outputTensorsImport, importedInputIds, importedOutputIds);
1247 arm_compute::CLScheduler::get().sync();
1250 outputResult =
reinterpret_cast<float*
>(alignedOutputImportPtr);
1251 CHECK(outputResult);
1252 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1260 found = dump.find(
"Convolution2dWorkload");
1261 CHECK(found != std::string::npos);
1264 found = dump.find(
"SyncMemGeneric");
1265 CHECK(found != std::string::npos);
1268 found = dump.find(
"CopyMemGeneric");
1269 CHECK(found != std::string::npos);
1270 runtime->UnloadNetwork(netId);
TEST_SUITE("TestConstTensorLayerVisitor")
uint32_t m_PadBottom
Padding bottom value in the height dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
static ProfilerManager & GetInstance()
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
A Convolution2dDescriptor for the Convolution2dLayer.
IConnectableLayer * AddConvertFp32ToFp16Layer(const char *name=nullptr)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
uint32_t m_PadRight
Padding right value in the width dimension.
void AnalyzeEventsAndWriteResults(std::ostream &outStream) const
Analyzes the tracked events and writes the results to the given output stream.
Copyright (c) 2021 ARM Limited and Contributors.
Private implementation of INetwork.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
uint32_t m_PadTop
Padding top value in the height dimension.
TEST_CASE_FIXTURE(ClContextControlFixture, "CopyBetweenNeonAndGpu")
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
IConnectableLayer * AddInputLayer(LayerBindingId id, const char *name=nullptr)
#define ARMNN_NO_DEPRECATE_WARN_END
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
IConnectableLayer * AddConvertFp16ToFp32Layer(const char *name=nullptr)
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
IConnectableLayer * AddOutputLayer(LayerBindingId id, const char *name=nullptr)
#define ARMNN_ASSERT(COND)
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
An ActivationDescriptor for the ActivationLayer.
This factory creates ClImportTensorHandles that refer to imported memory tensors. ...
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
MemorySource
Define the Memory Source to reduce copies.
const Graph & GetGraph() const
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const override
static INetworkPtr Create(NetworkOptions networkOptions={})
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
uint32_t m_PadLeft
Padding left value in the width dimension.
unsigned int GetNumElements() const