6 #include <arm_compute/runtime/CL/functions/CLActivationLayer.h> 12 #include <doctest/doctest.h> 18 using namespace armnn;
28 unsigned int numElements =
info.GetNumElements();
34 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
37 const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
38 arm_compute::CLActivationLayer act_func;
39 act_func.configure(&tensor,
nullptr, act_info);
42 const size_t totalBytes = tensor.info()->total_size();
43 const size_t alignment =
44 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
45 size_t space = totalBytes + alignment + alignment;
46 auto testData = std::make_unique<uint8_t[]>(space);
47 void* alignedPtr = testData.get();
48 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
54 auto* typedPtr =
reinterpret_cast<float*
>(alignedPtr);
55 std::fill_n(typedPtr, numElements, -5.0f);
59 arm_compute::CLScheduler::get().sync();
62 for(
unsigned int i = 0; i < numElements; ++i)
64 CHECK(typedPtr[i] == 0);
79 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
82 const size_t totalBytes = tensor.info()->total_size();
83 const size_t alignment =
84 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
85 size_t space = totalBytes + alignment + alignment;
86 auto testData = std::make_unique<uint8_t[]>(space);
87 void* alignedPtr = testData.get();
88 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
98 static_cast<MemorySourceFlags>(invalidMemSource));
106 std::vector<float> inputData
108 1.0f, 2.0f, 3.0f, 4.0f
128 IConnectableLayer* activation = net->AddActivationLayer(descriptor,
"Activation");
137 size_t totalBytes = numElements *
sizeof(float);
152 std::string ignoredErrorMessage;
155 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
158 const size_t alignment =
159 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
160 size_t space = totalBytes + alignment + alignment;
161 auto inputData = std::make_unique<uint8_t[]>(space);
162 void* alignedInputPtr = inputData.get();
163 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
166 auto* intputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
167 std::fill_n(intputPtr, numElements, -5.0f);
169 auto outputData = std::make_unique<uint8_t[]>(space);
170 void* alignedOutputPtr = outputData.get();
171 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
172 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
173 std::fill_n(outputPtr, numElements, -10.0f);
175 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
183 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
186 runtime->GetProfiler(netId)->EnableProfiling(
true);
189 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
193 std::stringstream ss;
195 std::string dump = ss.str();
198 std::size_t found = dump.find(
"ActivationWorkload");
199 CHECK(found != std::string::npos);
202 found = dump.find(
"SyncMemGeneric");
203 CHECK(found != std::string::npos);
206 found = dump.find(
"CopyMemGeneric");
207 CHECK(found == std::string::npos);
209 runtime->UnloadNetwork(netId);
213 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
215 for(
unsigned int i = 0; i < numElements; ++i)
217 CHECK(outputResult[i] >= 0);
232 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
235 const size_t totalBytes = tensor.info()->total_size();
236 const size_t alignment =
237 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
238 size_t space = totalBytes + alignment + alignment;
239 auto testData = std::make_unique<uint8_t[]>(space);
240 void* alignedPtr = testData.get();
241 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
248 TEST_CASE(
"ClCanBeImportedAlignedMemory")
258 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
261 const size_t totalBytes = tensor.info()->total_size();
262 const size_t alignment =
263 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
264 size_t space = totalBytes + alignment + alignment;
265 auto testData = std::make_unique<uint8_t[]>(space);
266 void* alignedPtr = testData.get();
267 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
293 std::vector<float> kernel =
300 const std::vector<float> expectedOutput =
307 unsigned int numElements = inputInfo.GetNumElements();
308 size_t totalBytes = numElements *
sizeof(float);
332 inputLayer->GetOutputSlot(0).Connect(convLayer->
GetInputSlot(0));
333 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
349 std::string ignoredErrorMessage;
352 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
355 const size_t alignment =
356 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
357 size_t space = totalBytes + alignment + alignment;
358 auto inputData = std::make_unique<uint8_t[]>(space);
359 void* alignedInputPtr = inputData.get();
360 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
363 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
378 auto outputData = std::make_unique<uint8_t[]>(space);
379 void* alignedOutputPtr = outputData.get();
380 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
381 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
382 std::fill_n(outputPtr, numElements, -10.0f);
384 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
392 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
395 runtime->GetProfiler(netId)->EnableProfiling(
true);
397 INFO(
"Run ImportInputs");
398 std::vector<ImportedInputId> importedInputIds =
401 CHECK(importedInputIds.size() == 1);
402 std::vector<ImportedOutputId> importedOutputIds =
405 CHECK(importedOutputIds.size() == 1);
411 std::stringstream ss;
413 std::string dump = ss.str();
416 std::size_t found = dump.find(
"Convolution2dWorkload");
417 CHECK(found != std::string::npos);
420 found = dump.find(
"SyncMemGeneric");
421 CHECK(found != std::string::npos);
424 found = dump.find(
"CopyMemGeneric");
425 CHECK(found == std::string::npos);
427 runtime->UnloadNetwork(netId);
431 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
435 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
440 using namespace half_float::literal;
452 std::vector<float> expectedOutput =
454 -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f,
455 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f
459 size_t totalBytesInput = numElements *
sizeof(
Half);
460 size_t totalBytesOutput = numElements *
sizeof(float);
468 inputLayer->GetOutputSlot(0).Connect(convLayer->
GetInputSlot(0));
469 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
485 std::string ignoredErrorMessage;
488 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
491 const size_t alignment =
492 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
493 size_t spaceInput = totalBytesInput + alignment + alignment;
494 size_t spaceOutput = totalBytesOutput + alignment + alignment;
495 auto inputData = std::make_unique<uint8_t[]>(spaceInput);
496 void* alignedInputPtr = inputData.get();
497 CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput));
500 auto* inputPtr =
reinterpret_cast<Half*
>(alignedInputPtr);
501 inputPtr[0] = -37.5_h;
502 inputPtr[1] = -15.2_h;
503 inputPtr[2] = -8.76_h;
504 inputPtr[3] = -2.0_h;
505 inputPtr[4] = -1.5_h;
506 inputPtr[5] = -1.3_h;
507 inputPtr[6] = -0.5_h;
508 inputPtr[7] = -0.4_h;
511 inputPtr[10] = 0.4_h;
512 inputPtr[11] = 0.5_h;
513 inputPtr[12] = 1.3_h;
514 inputPtr[13] = 1.5_h;
515 inputPtr[14] = 2.0_h;
516 inputPtr[15] = 8.76_h;
517 inputPtr[16] = 15.2_h;
518 inputPtr[17] = 37.5_h;
520 auto outputData = std::make_unique<uint8_t[]>(spaceOutput);
521 void* alignedOutputPtr = outputData.get();
522 CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput));
523 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
524 std::fill_n(outputPtr, numElements, -10.0f);
526 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
534 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
537 runtime->GetProfiler(netId)->EnableProfiling(
true);
539 INFO(
"Run ImportInputs");
540 std::vector<ImportedInputId> importedInputIds =
543 CHECK(importedInputIds.size() == 1);
544 std::vector<ImportedOutputId> importedOutputIds =
547 CHECK(importedOutputIds.size() == 1);
554 std::stringstream ss;
556 std::string dump = ss.str();
559 std::size_t found = dump.find(
"ConvertFp16ToFp32Workload");
560 CHECK(found != std::string::npos);
563 found = dump.find(
"SyncMemGeneric");
564 CHECK(found != std::string::npos);
567 found = dump.find(
"CopyMemGeneric");
568 CHECK(found == std::string::npos);
570 runtime->UnloadNetwork(netId);
574 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
578 for (
size_t i = 0; i < numElements; ++i)
580 DOCTEST_CHECK_MESSAGE(outputResult[i] == doctest::Approx(expectedOutput[i]).epsilon(0.0004),
581 "outputValue[" << i <<
"]: " << outputResult[i] <<
" != " << expectedOutput[i]);
588 using namespace half_float::literal;
600 std::vector<Half> expectedOutput =
602 -37.5_h, -15.2_h, -8.76_h, -2.0_h, -1.5_h, -1.3_h, -0.5_h, -0.4_h, 0.0_h,
603 1.0_h, 0.4_h, 0.5_h, 1.3_h, 1.5_h, 2.0_h, 8.76_h, 15.2_h, 37.5_h
607 size_t totalBytesInput = numElements *
sizeof(float);
608 size_t totalBytesOutput = numElements *
sizeof(
Half);
616 inputLayer->GetOutputSlot(0).Connect(convLayer->
GetInputSlot(0));
617 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
633 std::string ignoredErrorMessage;
636 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
639 const size_t alignment =
640 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
641 size_t spaceInput = totalBytesInput + alignment + alignment;
642 size_t spaceOutput = totalBytesOutput + alignment + alignment;
643 auto inputData = std::make_unique<uint8_t[]>(spaceInput);
644 void* alignedInputPtr = inputData.get();
645 CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput));
648 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
649 inputPtr[0] = -37.5f;
650 inputPtr[1] = -15.2f;
651 inputPtr[2] = -8.76f;
664 inputPtr[15] = 8.76f;
665 inputPtr[16] = 15.2f;
666 inputPtr[17] = 37.5f;
668 auto outputData = std::make_unique<uint8_t[]>(spaceOutput);
669 void* alignedOutputPtr = outputData.get();
670 CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput));
671 auto* outputPtr =
reinterpret_cast<Half*
>(alignedOutputPtr);
672 std::fill_n(outputPtr, numElements, -10.0f);
674 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
682 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
685 runtime->GetProfiler(netId)->EnableProfiling(
true);
687 INFO(
"Run ImportInputs");
688 std::vector<ImportedInputId> importedInputIds =
691 CHECK(importedInputIds.size() == 1);
692 std::vector<ImportedOutputId> importedOutputIds =
695 CHECK(importedOutputIds.size() == 1);
702 std::stringstream ss;
704 std::string dump = ss.str();
707 std::size_t found = dump.find(
"ConvertFp32ToFp16Workload");
708 CHECK(found != std::string::npos);
711 found = dump.find(
"SyncMemGeneric");
712 CHECK(found != std::string::npos);
715 found = dump.find(
"CopyMemGeneric");
716 CHECK(found == std::string::npos);
718 runtime->UnloadNetwork(netId);
722 auto* outputResult =
reinterpret_cast<Half*
>(alignedOutputPtr);
726 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
731 using namespace half_float::literal;
743 std::vector<Half> expectedOutput = { 1.0_h };
746 size_t totalBytesInput = numElements *
sizeof(float);
747 size_t totalBytesOutput = numElements *
sizeof(
Half);
755 inputLayer->GetOutputSlot(0).Connect(convLayer->
GetInputSlot(0));
756 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
772 std::string ignoredErrorMessage;
775 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
778 const size_t alignment =
779 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
780 size_t spaceInput = totalBytesInput + alignment + alignment;
781 size_t spaceOutput = totalBytesOutput + alignment + alignment;
782 auto inputData = std::make_unique<uint8_t[]>(spaceInput);
783 void* alignedInputPtr = inputData.get();
784 CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput));
787 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
790 auto outputData = std::make_unique<uint8_t[]>(spaceOutput);
791 void* alignedOutputPtr = outputData.get();
792 CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput));
793 auto* outputPtr =
reinterpret_cast<Half*
>(alignedOutputPtr);
794 std::fill_n(outputPtr, numElements, -10.0f);
796 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
804 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
807 runtime->GetProfiler(netId)->EnableProfiling(
true);
809 INFO(
"Run ImportInputs");
810 std::vector<ImportedInputId> importedInputIds =
812 CHECK(importedInputIds.size() == 1);
813 std::vector<ImportedOutputId> importedOutputIds =
815 CHECK(importedOutputIds.size() == 1);
822 std::stringstream ss;
824 std::string dump = ss.str();
827 std::size_t found = dump.find(
"ConvertFp32ToFp16Workload");
828 CHECK(found != std::string::npos);
831 found = dump.find(
"SyncMemGeneric");
832 CHECK(found != std::string::npos);
835 found = dump.find(
"CopyMemGeneric");
836 CHECK(found == std::string::npos);
838 runtime->UnloadNetwork(netId);
842 auto* outputResult =
reinterpret_cast<Half*
>(alignedOutputPtr);
846 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
869 std::vector<float> kernel =
876 const std::vector<float> expectedOutput =
883 unsigned int numElements = inputInfo.GetNumElements();
884 size_t totalBytes = numElements *
sizeof(float);
907 inputLayer->GetOutputSlot(0).Connect(convLayer->
GetInputSlot(0));
908 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
924 std::string ignoredErrorMessage;
927 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
930 const size_t alignment =
931 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
932 size_t space = totalBytes + alignment + alignment;
933 auto inputData = std::make_unique<uint8_t[]>(space);
934 void* alignedInputPtr = inputData.get();
935 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
938 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
953 auto outputData = std::make_unique<uint8_t[]>(space);
954 void* alignedOutputPtr = outputData.get();
955 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
956 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
957 std::fill_n(outputPtr, numElements, -10.0f);
959 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
967 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
970 runtime->GetProfiler(netId)->EnableProfiling(
true);
972 INFO(
"Run ImportInputs");
973 std::vector<ImportedInputId> importedInputIds =
976 CHECK(importedInputIds.size() == 1);
977 std::vector<ImportedOutputId> importedOutputIds =
980 CHECK(importedOutputIds.size() == 1);
987 std::stringstream ss;
989 std::string dump = ss.str();
992 std::size_t found = dump.find(
"Convolution2dWorkload");
993 CHECK(found != std::string::npos);
996 found = dump.find(
"SyncMemGeneric");
997 CHECK(found != std::string::npos);
1000 found = dump.find(
"CopyMemGeneric");
1001 CHECK(found == std::string::npos);
1004 arm_compute::CLScheduler::get().sync();
1007 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
1008 CHECK(outputResult);
1009 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1014 auto inputDataCopy = std::make_unique<uint8_t[]>(space);
1015 void* copyInputPtr = inputDataCopy.get();
1018 auto* inputCopyPtr =
reinterpret_cast<float*
>(copyInputPtr);
1019 inputCopyPtr[0] = 1;
1020 inputCopyPtr[1] = 5;
1021 inputCopyPtr[2] = 2;
1022 inputCopyPtr[3] = 3;
1023 inputCopyPtr[4] = 8;
1024 inputCopyPtr[5] = 7;
1025 inputCopyPtr[6] = 3;
1026 inputCopyPtr[7] = 6;
1027 inputCopyPtr[8] = 3;
1028 inputCopyPtr[9] = 3;
1029 inputCopyPtr[10] = 9;
1030 inputCopyPtr[11] = 1;
1033 auto outputDataCopy = std::make_unique<uint8_t[]>(space);
1034 void* copyOutputPtr = outputDataCopy.get();
1035 auto* outputCopyPtr =
reinterpret_cast<float*
>(copyOutputPtr);
1036 std::fill_n(outputCopyPtr, numElements, -10.0f);
1044 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), copyOutputPtr)}
1048 runtime->EnqueueWorkload(netId, inputTensorsCopy, outputTensorsCopy);
1050 arm_compute::CLScheduler::get().sync();
1053 outputResult =
reinterpret_cast<float*
>(copyOutputPtr);
1054 CHECK(outputResult);
1055 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1062 found = dump.find(
"Convolution2dWorkload");
1063 CHECK(found != std::string::npos);
1066 found = dump.find(
"SyncMemGeneric");
1067 CHECK(found != std::string::npos);
1070 found = dump.find(
"CopyMemGeneric");
1071 CHECK(found != std::string::npos);
1072 runtime->UnloadNetwork(netId);
1094 std::vector<float> kernel =
1101 const std::vector<float> expectedOutput =
1108 unsigned int numElements = inputInfo.GetNumElements();
1109 size_t totalBytes = numElements *
sizeof(float);
1133 inputLayer->GetOutputSlot(0).Connect(convLayer->
GetInputSlot(0));
1134 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1150 std::string ignoredErrorMessage;
1153 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1156 const size_t alignment =
1157 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
1158 size_t space = totalBytes + alignment + alignment;
1159 auto inputData = std::make_unique<uint8_t[]>(space);
1160 void* copyInputPtr = inputData.get();
1163 auto* inputPtr =
reinterpret_cast<float*
>(copyInputPtr);
1178 auto outputData = std::make_unique<uint8_t[]>(space);
1179 void* copyOutputPtr = outputData.get();
1180 auto* outputPtr =
reinterpret_cast<float*
>(copyOutputPtr);
1181 std::fill_n(outputPtr, numElements, -10.0f);
1183 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
1191 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), copyOutputPtr)}
1194 runtime->GetProfiler(netId)->EnableProfiling(
true);
1197 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1201 std::stringstream ss;
1203 std::string dump = ss.str();
1206 std::size_t found = dump.find(
"Convolution2dWorkload");
1207 CHECK(found != std::string::npos);
1210 found = dump.find(
"SyncMemGeneric");
1211 CHECK(found == std::string::npos);
1214 found = dump.find(
"CopyMemGeneric");
1215 CHECK(found != std::string::npos);
1218 arm_compute::CLScheduler::get().sync();
1221 auto* outputResult =
reinterpret_cast<float*
>(copyOutputPtr);
1222 CHECK(outputResult);
1223 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1228 auto inputDataImport = std::make_unique<uint8_t[]>(space);
1229 void* alignedInputImportPtr = inputDataImport.get();
1230 CHECK(std::align(alignment, totalBytes, alignedInputImportPtr, space));
1233 auto* inputImportPtr =
reinterpret_cast<float*
>(alignedInputImportPtr);
1234 inputImportPtr[0] = 1;
1235 inputImportPtr[1] = 5;
1236 inputImportPtr[2] = 2;
1237 inputImportPtr[3] = 3;
1238 inputImportPtr[4] = 8;
1239 inputImportPtr[5] = 7;
1240 inputImportPtr[6] = 3;
1241 inputImportPtr[7] = 6;
1242 inputImportPtr[8] = 3;
1243 inputImportPtr[9] = 3;
1244 inputImportPtr[10] = 9;
1245 inputImportPtr[11] = 1;
1248 auto outputDataImport = std::make_unique<uint8_t[]>(space);
1249 void* alignedOutputImportPtr = outputDataImport.get();
1250 CHECK(std::align(alignment, totalBytes, alignedOutputImportPtr, space));
1251 auto* outputImportPtr =
reinterpret_cast<float*
>(alignedOutputImportPtr);
1252 std::fill_n(outputImportPtr, numElements, -10.0f);
1260 {0,
armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputImportPtr)}
1263 INFO(
"Run ImportInputs");
1264 std::vector<ImportedInputId> importedInputIds =
1266 CHECK(importedInputIds.size() == 1);
1267 std::vector<ImportedOutputId> importedOutputIds =
1269 CHECK(importedOutputIds.size() == 1);
1274 arm_compute::CLScheduler::get().sync();
1277 outputResult =
reinterpret_cast<float*
>(alignedOutputImportPtr);
1278 CHECK(outputResult);
1279 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1287 found = dump.find(
"Convolution2dWorkload");
1288 CHECK(found != std::string::npos);
1291 found = dump.find(
"SyncMemGeneric");
1292 CHECK(found != std::string::npos);
1295 found = dump.find(
"CopyMemGeneric");
1296 CHECK(found != std::string::npos);
1297 runtime->UnloadNetwork(netId);
TEST_SUITE("TestConstTensorLayerVisitor")
uint32_t m_PadBottom
Padding bottom value in the height dimension.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
static ProfilerManager & GetInstance()
A Convolution2dDescriptor for the Convolution2dLayer.
IConnectableLayer * AddConvertFp32ToFp16Layer(const char *name=nullptr)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
uint32_t m_PadRight
Padding right value in the width dimension.
void AnalyzeEventsAndWriteResults(std::ostream &outStream) const
Analyzes the tracked events and writes the results to the given output stream.
Copyright (c) 2021 ARM Limited and Contributors.
Private implementation of INetwork.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
uint32_t m_PadTop
Padding top value in the height dimension.
TEST_CASE_FIXTURE(ClContextControlFixture, "CopyBetweenNeonAndGpu")
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
IConnectableLayer * AddInputLayer(LayerBindingId id, const char *name=nullptr)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
IConnectableLayer * AddConvertFp16ToFp32Layer(const char *name=nullptr)
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
IConnectableLayer * AddOutputLayer(LayerBindingId id, const char *name=nullptr)
#define ARMNN_ASSERT(COND)
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
An ActivationDescriptor for the ActivationLayer.
const TensorInfo & GetInfo() const
This factory creates ClImportTensorHandles that refer to imported memory tensors. ...
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
MemorySource
Define the Memory Source to reduce copies.
const Graph & GetGraph() const
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const override
static INetworkPtr Create(NetworkOptions networkOptions={})
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
uint32_t m_PadLeft
Padding left value in the width dimension.
unsigned int GetNumElements() const