11 #include <GraphUtils.hpp> 12 #include <TestUtils.hpp> 14 #include <doctest/doctest.h> 19 using namespace armnn;
25 std::vector<T> GetVector(
unsigned int size,
float initial,
float increment)
27 std::vector<float> typeVector(size, initial);
28 std::vector<T> vector(size);
32 for (
unsigned int i = 0; i < size; ++i)
34 vector[i] = T(initial + (increment * static_cast<float>(i)));
40 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
41 struct Convolution2dTest
44 static const bool isElementWise =
false;
45 static const bool isConstTensorAsInputSupported =
false;
51 constexpr
static const unsigned int inputSize = 48;
52 constexpr
static const unsigned int outputSize = 36;
64 std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
65 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
66 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
67 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
68 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
69 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset,
true);
76 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
87 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
88 struct DWConvolution2dTest
92 static const bool isElementWise =
false;
93 static const bool isConstTensorAsInputSupported =
false;
99 constexpr
static const unsigned int inputSize = 48;
100 constexpr
static const unsigned int outputSize = 108;
113 std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
114 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
115 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
116 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
117 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
118 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset,
true);
125 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
136 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
141 static const bool isElementWise =
false;
142 static const bool isConstTensorAsInputSupported =
true;
148 constexpr
static const unsigned int inputSize = 10;
149 constexpr
static const unsigned int outputSize = 6;
165 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
169 std::vector<float> weightsData = { 1, 2, 3, 4, 5,
172 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
173 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset,
true);
179 std::vector<IConnectableLayer*> layers = { weightsLayer };
184 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
189 static const bool isElementWise =
false;
190 static const bool isConstTensorAsInputSupported =
false;
195 constexpr
static const unsigned int inputSize = 48;
196 constexpr
static const unsigned int outputSize = 48;
209 std::vector<T> betaVector = GetVector<T>(GetOutputShape()[3], 0.0f, 0.2f);
210 std::vector<T> gammaVector = GetVector<T>(GetOutputShape()[3], 0.5f, 0.1f);
211 std::vector<T> meanVector = GetVector<T>(GetOutputShape()[3], 0.1f, 0.1f);
212 std::vector<T> varianceVector = GetVector<T>(GetOutputShape()[3], 1.0f, 0.1f);
214 const unsigned int outputChannelSize[] = { GetOutputShape()[3] };
223 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
234 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
238 static const bool isElementWise =
true;
239 static const bool isConstTensorAsInputSupported =
false;
244 constexpr
static const unsigned int inputSize = 48;
245 constexpr
static const unsigned int outputSize = 48;
258 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
269 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
273 static const bool isElementWise =
true;
274 static const bool isConstTensorAsInputSupported =
false;
279 constexpr
static const unsigned int inputSize = 48;
280 constexpr
static const unsigned int outputSize = 48;
293 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
304 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
308 static const bool isElementWise =
true;
309 static const bool isConstTensorAsInputSupported =
false;
314 constexpr
static const unsigned int inputSize = 48;
315 constexpr
static const unsigned int outputSize = 48;
328 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
339 template<DataType ArmnnType,
typename T = ResolveType<ArmnnType>>
343 static const bool isElementWise =
true;
344 static const bool isConstTensorAsInputSupported =
false;
349 constexpr
static const unsigned int inputSize = 48;
350 constexpr
static const unsigned int outputSize = 48;
363 static std::vector<IConnectableLayer*> AddConstantLayers(
INetwork* network,
374 template<
typename LayerTest,
377 float scale, int32_t offset)
389 IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
393 IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):
nullptr;
396 if(LayerTest::isConstTensorAsInputSupported)
398 std::vector<IConnectableLayer*> constantLayers = LayerTest::AddConstantLayers(network.get(),
403 for (
unsigned int i = 0; i < constantLayers.size(); ++i)
410 TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
411 TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
416 activationLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
421 activationLayer->GetOutputSlot(0).Connect(outputLayer->
GetInputSlot(0));
423 if (LayerTest::isElementWise)
435 template<
typename LayerTest,
440 float scale = 1.f, int32_t offset=0)
444 INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor,
false, scale, offset);
454 auto checkFusedConv2d = [](
const Layer*
const layer)->
bool {
455 return IsLayerOfType<LayerType>(layer) &&
456 (layer->GetNameStr() ==
"fused-activation-into-receiverLayer");
460 if(LayerTest::isConstTensorAsInputSupported)
462 CHECK(4 == graphFused.GetNumLayers());
465 &IsLayerOfType<InputLayer>,
466 &IsLayerOfType<ConstantLayer>,
468 &IsLayerOfType<OutputLayer>));
472 CHECK(fusedReceiverLayer);
477 CHECK(3 == graphFused.GetNumLayers());
480 &IsLayerOfType<InputLayer>,
482 &IsLayerOfType<OutputLayer>));
487 CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) ==
Status::Success);
490 std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
491 std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
492 std::vector<T> outputDataFused(LayerTest::outputSize);
494 armnn::TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
498 {0,
ConstTensor(inputTensorInfo, inputDataFused.data())}};
500 {0,
Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
503 CHECK(run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused) ==
Status::Success);
507 INetworkPtr networkNotFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor,
true, scale, offset);
518 if(LayerTest::isConstTensorAsInputSupported)
520 CHECK(6 == graphNotFused.GetNumLayers());
522 graphNotFused.cend(),
523 &IsLayerOfType<InputLayer>,
524 &IsLayerOfType<ConstantLayer>,
525 &IsLayerOfType<LayerType>,
526 &IsLayerOfType<ActivationLayer>,
527 &IsLayerOfType<OutputLayer>,
528 &IsLayerOfType<OutputLayer>));
532 CHECK(5 == graphNotFused.GetNumLayers());
534 graphNotFused.cend(),
535 &IsLayerOfType<InputLayer>,
536 &IsLayerOfType<LayerType>,
537 &IsLayerOfType<ActivationLayer>,
538 &IsLayerOfType<OutputLayer>,
539 &IsLayerOfType<OutputLayer>));
544 CHECK(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) ==
Status::Success);
547 std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
548 std::vector<T> outputDataNotFused(LayerTest::outputSize);
549 std::vector<T> outputData2NotFused(LayerTest::outputSize);
551 TensorInfo inputTensorInfoNotFused = runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0);
555 {0,
ConstTensor(inputTensorInfoNotFused, inputDataNotFused.data())}};
557 {0,
Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
558 {1,
Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
561 CHECK(runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused)
565 for (
unsigned int n = 0; n < outputDataFused.size(); ++n)
567 auto outputNotFused =
static_cast<float>(outputDataNotFused[n]);
568 CHECK(static_cast<float>(outputDataFused[n]) == doctest::Approx(outputNotFused).epsilon(tolerance));
572 template<
typename LayerTest,
577 float scale = 1.f, int32_t offset = 0)
583 INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor,
false, scale, offset);
593 CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) ==
Status::Success);
596 std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
597 std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
598 std::vector<T> outputDataFused(LayerTest::outputSize);
600 TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
604 {0,
ConstTensor(inputTensorInfo, inputDataFused.data())}};
606 {0,
Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
609 run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
613 catch (
const std::exception& e)
615 std::cerr << e.what() << std::endl;
624 #if defined(ARMCOMPUTENEON_ENABLED) 628 TEST_CASE(
"FuseReLUIntoConvFloat32CpuAccTest")
633 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
636 TEST_CASE(
"FuseReLUIntoDWConvFloat32CpuAccTest")
641 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
644 TEST_CASE(
"FuseReLUIntoFullyConnectedFloat32CpuAccTest")
649 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
652 TEST_CASE(
"FuseReLUIntoBatchNormFloat32CpuAccTest")
657 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
662 TEST_CASE(
"FuseBoundedReLUIntoConvFloat32CpuAccTest")
666 activationDescriptor.
m_A = 1.0f;
667 activationDescriptor.
m_B = -1.0f;
669 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
672 TEST_CASE(
"FuseBoundedReLUIntoDWConvFloat32CpuAccTest")
676 activationDescriptor.
m_A = 1.0f;
677 activationDescriptor.
m_B = -1.0f;
679 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > ,
DataType::Float32 >
682 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest")
686 activationDescriptor.
m_A = 1.0f;
687 activationDescriptor.
m_B = -1.0f;
689 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
692 TEST_CASE(
"FuseBoundedReLUIntoBatchNormFloat32CpuAccTest")
696 activationDescriptor.
m_A = 1.0f;
697 activationDescriptor.
m_B = -1.0f;
699 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
704 TEST_CASE(
"FuseReLUIntoConvQAsymmU8CpuAccTest")
709 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
712 TEST_CASE(
"FuseReLUIntoDWConvQAsymmU8CpuAccTest")
717 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
720 TEST_CASE(
"FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest")
725 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
730 TEST_CASE(
"FuseBoundedReLUIntoConvQASymmS8CpuAccTest")
734 activationDescriptor.
m_A = 6.0f;
735 activationDescriptor.
m_B = 0.0f;
737 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
740 TEST_CASE(
"FuseBoundedReLUIntoDWConvQASymmS8CpuAccTest")
744 activationDescriptor.
m_A = 6.0f;
745 activationDescriptor.
m_B = 0.0f;
747 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > ,
DataType::QAsymmS8 >
750 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedQASymmS8CpuAccTest")
754 activationDescriptor.
m_A = 6.0f;
755 activationDescriptor.
m_B = 0.0f;
757 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
762 TEST_CASE(
"FuseTanHIntoConvFloat32CpuAccTest")
767 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
772 TEST_CASE(
"FuseHardSwishIntoConvFloat32CpuAccTest")
777 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
782 TEST_CASE(
"LayerFollowedByActivationFloat32CpuAccTest")
785 for (
int i = 0; i != 12; ++i)
788 activationDescriptor.
m_A = 1.0f;
789 activationDescriptor.
m_B = -1.0f;
790 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
791 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " << i);
792 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
793 (activationDescriptor,
Compute::CpuAcc)),
"DepthwiseConvolution + Activation function " << i);
794 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
795 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " << i);
796 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
797 (activationDescriptor,
Compute::CpuAcc)),
"BatchNorm + Activation function " << i);
800 TEST_CASE(
"LayerFollowedByActivationFloat16CpuAccTest")
803 for (
int i = 0; i != 12; ++i)
806 activationDescriptor.
m_A = 1.0f;
807 activationDescriptor.
m_B = -1.0f;
808 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>,
DataType::Float16>
809 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " << i);
810 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>,
DataType::Float16>
811 (activationDescriptor,
Compute::CpuAcc)),
"DepthwiseConvolution + Activation function " << i);
812 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>,
DataType::Float16>
813 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " << i);
814 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>,
DataType::Float16>
815 (activationDescriptor,
Compute::CpuAcc)),
"BatchNorm + Activation function " << i);
818 TEST_CASE(
"LayerFollowedByActivationQAsymmU8CpuAccTest")
823 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
824 (activationDescriptor,
Compute::CpuAcc, 1.f / 256.f, 0)),
"Convolution + Activation function " <<
825 static_cast<int>(activationDescriptor.
m_Function));
826 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
827 (activationDescriptor,
Compute::CpuAcc, 1.f / 256.f, 0)),
"FullyConnected + Activation function " <<
828 static_cast<int>(activationDescriptor.
m_Function));
831 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
832 (activationDescriptor,
Compute::CpuAcc, 1.f / 128.f, 128)),
"Convolution + Activation function " <<
833 static_cast<int>(activationDescriptor.
m_Function));
834 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
835 (activationDescriptor,
Compute::CpuAcc, 1.f / 128.f, 128)),
"FullyConnected + Activation function " <<
836 static_cast<int>(activationDescriptor.
m_Function));
839 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
840 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " <<
841 static_cast<int>(activationDescriptor.
m_Function));
842 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
843 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " <<
844 static_cast<int>(activationDescriptor.
m_Function));
847 activationDescriptor.
m_A = 1.0f;
848 activationDescriptor.
m_B = -1.0f;
849 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
850 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " <<
851 static_cast<int>(activationDescriptor.
m_Function));
852 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
853 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " <<
854 static_cast<int>(activationDescriptor.
m_Function));
857 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
858 (activationDescriptor,
Compute::CpuAcc)),
"Convolution + Activation function " <<
859 static_cast<int>(activationDescriptor.
m_Function));
860 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
861 (activationDescriptor,
Compute::CpuAcc)),
"FullyConnected + Activation function " <<
862 static_cast<int>(activationDescriptor.
m_Function));
867 #if defined(ARMCOMPUTECL_ENABLED) 871 TEST_CASE(
"FuseReLUIntoConvFloat32GpuAccTest")
876 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
879 TEST_CASE(
"FuseReLUIntoDWConvFloat32GpuAccTest")
884 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
887 TEST_CASE(
"FuseReLUIntoFullyConnectedFloat32GpuAccTest")
892 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
895 TEST_CASE(
"FuseReLUIntoBatchNormFloat32GpuAccTest")
900 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
903 TEST_CASE(
"FuseReLUIntoMulFloat32GpuAccTest")
908 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
911 TEST_CASE(
"FuseReLUIntoAddFloat32GpuAccTest")
916 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
919 TEST_CASE(
"FuseReLUIntoSubFloat32GpuAccTest")
924 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
927 TEST_CASE(
"FuseReLUIntoDivFloat32GpuAccTest")
932 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
937 TEST_CASE(
"FuseBoundedReLUIntoConvFloat32GpuAccTest")
941 activationDescriptor.
m_A = 1.0f;
942 activationDescriptor.
m_B = -1.0f;
944 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
947 TEST_CASE(
"FuseBoundedReLUIntoDWConvFloat32GpuAccTest")
951 activationDescriptor.
m_A = 1.0f;
952 activationDescriptor.
m_B = -1.0f;
954 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
957 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest")
961 activationDescriptor.
m_A = 1.0f;
962 activationDescriptor.
m_B = -1.0f;
964 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
967 TEST_CASE(
"FuseBoundedReLUIntoBatchNormFloat32GpuAccTest")
971 activationDescriptor.
m_A = 1.0f;
972 activationDescriptor.
m_B = -1.0f;
974 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
977 TEST_CASE(
"FuseBoundedReLUIntoMulFloat32GpuAccTest")
981 activationDescriptor.
m_A = 1.0f;
982 activationDescriptor.
m_B = -1.0f;
984 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
987 TEST_CASE(
"FuseBoundedReLUIntoAddFloat32GpuAccTest")
991 activationDescriptor.
m_A = 1.0f;
992 activationDescriptor.
m_B = -1.0f;
994 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
997 TEST_CASE(
"FuseBoundedReLUIntoSubFloat32GpuAccTest")
1001 activationDescriptor.
m_A = 1.0f;
1002 activationDescriptor.
m_B = -1.0f;
1004 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1007 TEST_CASE(
"FuseBoundedReLUIntoDivFloat32GpuAccTest")
1011 activationDescriptor.
m_A = 1.0f;
1012 activationDescriptor.
m_B = -1.0f;
1014 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1019 TEST_CASE(
"FuseReLUIntoConvFloat16GpuAccTest")
1024 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float16>,
DataType::Float16>
1027 TEST_CASE(
"FuseReLUIntoDWConvFloat16GpuAccTest")
1032 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float16>,
DataType::Float16>
1035 TEST_CASE(
"FuseReLUIntoFullyConnectedFloat16GpuAccTest")
1040 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float16>,
DataType::Float16>
1043 TEST_CASE(
"FuseReLUIntoBatchNormFloat16GpuAccTest")
1048 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float16>,
DataType::Float16>
1051 TEST_CASE(
"FuseReLUIntoMulFloat16GpuAccTest")
1056 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float16>,
DataType::Float16>
1059 TEST_CASE(
"FuseReLUIntoAddFloat16GpuAccTest")
1064 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float16>,
DataType::Float16>
1067 TEST_CASE(
"FuseReLUIntoSubFloat16GpuAccTest")
1072 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float16>,
DataType::Float16>
1075 TEST_CASE(
"FuseReLUIntoDivFloat16GpuAccTest")
1080 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float16>,
DataType::Float16>
1085 TEST_CASE(
"FuseReLUQIntoConvAsymmU8GpuAccTest")
1090 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1093 TEST_CASE(
"FuseReLUQIntoDWConvAsymmU8GpuAccTest")
1098 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1101 TEST_CASE(
"FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest")
1106 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1111 TEST_CASE(
"FuseBoundedReLUIntoConvQASymmS8GpuAccTest")
1115 activationDescriptor.
m_A = 6.0f;
1116 activationDescriptor.
m_B = 0.0f;
1118 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
1121 TEST_CASE(
"FuseBoundedReLUIntoDWConvQASymmS8GpuAccTest")
1125 activationDescriptor.
m_A = 6.0f;
1126 activationDescriptor.
m_B = 0.0f;
1128 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > ,
DataType::QAsymmS8 >
1131 TEST_CASE(
"FuseBoundedReLUIntoFullyConnectedQASymmS8GpuAccTest")
1135 activationDescriptor.
m_A = 6.0f;
1136 activationDescriptor.
m_B = 0.0f;
1138 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>,
DataType::QAsymmS8>
1143 TEST_CASE(
"FuseTanHIntoConvFloat32GpuAccTest")
1148 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
1151 TEST_CASE(
"FuseTanHIntoMulFloat32GpuAccTest")
1156 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
1159 TEST_CASE(
"FuseTanHIntoAddFloat32GpuAccTest")
1164 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
1167 TEST_CASE(
"FuseTanHIntoSubFloat32GpuAccTest")
1172 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1175 TEST_CASE(
"FuseTanHIntoDivFloat32GpuAccTest")
1180 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1185 TEST_CASE(
"FuseHardSwishIntoConvFloat32GpuAccTest")
1190 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
1193 TEST_CASE(
"FuseHardSwishIntoMulFloat32GpuAccTest")
1198 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
1201 TEST_CASE(
"FuseHardSwishIntoAddFloat32GpuAccTest")
1206 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>,
DataType::Float32>
1209 TEST_CASE(
"FuseHardSwishIntoSubFloat32GpuAccTest")
1214 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1217 TEST_CASE(
"FuseHardSwishIntoDivFloat32GpuAccTest")
1222 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1227 TEST_CASE(
"LayerFollowedByActivationFloat32GpuAccTest")
1230 for (
int i = 0; i != 12; ++i)
1233 activationDescriptor.
m_A = 1.0f;
1234 activationDescriptor.
m_B = -1.0f;
1237 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>,
DataType::Float32>
1238 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " << i);
1239 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>,
DataType::Float32>
1240 (activationDescriptor,
Compute::GpuAcc)),
"DepthwiseConvolution + Activation function " << i);
1241 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>,
DataType::Float32>
1242 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " << i);
1243 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>,
DataType::Float32>
1244 (activationDescriptor,
Compute::GpuAcc)),
"BatchNorm + Activation function " << i);
1245 CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>,
DataType::Float32>
1246 (activationDescriptor,
Compute::GpuAcc)),
"Multiplication + Activation function " << i);
1247 CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>,
DataType::Float32>
1248 (activationDescriptor,
Compute::GpuAcc)),
"Addition + Activation function " << i);
1249 CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>,
DataType::Float32>
1250 (activationDescriptor,
Compute::GpuAcc)),
"Subtraction + Activation function " << i);
1251 CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>,
DataType::Float32>
1252 (activationDescriptor,
Compute::GpuAcc)),
"Division + Activation function " << i);
1256 TEST_CASE(
"LayerFollowedByActivationFloat16GpuAccTest")
1259 for (
int i = 0; i != 12; ++i)
1262 activationDescriptor.
m_A = 1.0f;
1263 activationDescriptor.
m_B = -1.0f;
1266 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>,
DataType::Float16>
1267 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " << i);
1268 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>,
DataType::Float16>
1269 (activationDescriptor,
Compute::GpuAcc)),
"Depthwise + Activation function " << i);
1270 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>,
DataType::Float16>
1271 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " << i);
1272 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>,
DataType::Float16>
1273 (activationDescriptor,
Compute::GpuAcc)),
"BatchNorm + Activation function " << i);
1274 CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>,
DataType::Float16>
1275 (activationDescriptor,
Compute::GpuAcc)),
"Multiplication + Activation function " << i);
1276 CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>,
DataType::Float16>
1277 (activationDescriptor,
Compute::GpuAcc)),
"Addition + Activation function " << i);
1278 CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>,
DataType::Float16>
1279 (activationDescriptor,
Compute::GpuAcc)),
"Subtraction + Activation function " << i);
1280 CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>,
DataType::Float16>
1281 (activationDescriptor,
Compute::GpuAcc)),
"Division + Activation function " << i);
1285 TEST_CASE(
"LayerFollowedByActivationQAsymmU8GpuAccTest")
1290 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1291 (activationDescriptor,
Compute::GpuAcc, 1.f / 256.f, 0)),
"Convolution + Activation function " <<
1292 static_cast<int>(activationDescriptor.
m_Function));
1293 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1294 (activationDescriptor,
Compute::GpuAcc, 1.f / 256.f, 0)),
"FullyConnected + Activation function " <<
1295 static_cast<int>(activationDescriptor.
m_Function));
1298 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1299 (activationDescriptor,
Compute::GpuAcc, 1.f / 128.f, 128)),
"Convolution + Activation function " <<
1300 static_cast<int>(activationDescriptor.
m_Function));
1301 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1302 (activationDescriptor,
Compute::GpuAcc, 1.f / 128.f, 128)),
"FullyConnected + Activation function " <<
1303 static_cast<int>(activationDescriptor.
m_Function));
1306 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1307 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " <<
1308 static_cast<int>(activationDescriptor.
m_Function));
1309 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1310 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " <<
1311 static_cast<int>(activationDescriptor.
m_Function));
1314 activationDescriptor.
m_A = 1.0f;
1315 activationDescriptor.
m_B = -1.0f;
1316 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1317 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " <<
1318 static_cast<int>(activationDescriptor.
m_Function));
1319 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1320 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " <<
1321 static_cast<int>(activationDescriptor.
m_Function));
1324 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1325 (activationDescriptor,
Compute::GpuAcc)),
"Convolution + Activation function " <<
1326 static_cast<int>(activationDescriptor.
m_Function));
1327 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>,
DataType::QAsymmU8>
1328 (activationDescriptor,
Compute::GpuAcc)),
"FullyConnected + Activation function " <<
1329 static_cast<int>(activationDescriptor.
m_Function));
TEST_SUITE("TestConstTensorLayerVisitor")
IConnectableLayer * AddSubtractionLayer(const char *name=nullptr)
Adds a subtraction layer to the network.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
This layer represents a batch normalization operation.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
bool m_BiasEnabled
Enable/disable bias.
bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last)
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
IConnectableLayer * AddConstantLayer(const ConstTensor &input, const char *name=nullptr)
Adds a layer with no inputs and a single output, which always corresponds to the passed in constant t...
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
IConnectableLayer * AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D depthwise convolution layer to the network.
This layer represents a depthwise convolution 2d operation.
A Convolution2dDescriptor for the Convolution2dLayer.
LayerTestResult< float, 4 > DivisionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
typename ResolveTypeImpl< DT >::Type ResolveType
Main network class which provides the interface for building up a neural network. ...
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
IConnectableLayer * AddConvolution2dLayer(const Convolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D convolution layer to the network.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
IConnectableLayer * AddDivisionLayer(const char *name=nullptr)
Adds a division layer to the network.
LayerTestResult< float, 4 > AdditionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
IConnectableLayer * AddFullyConnectedLayer(const FullyConnectedDescriptor &fullyConnectedDescriptor, const char *name=nullptr)
Adds a fully connected layer to the network.
LayerTestResult< T, 2 > FullyConnectedTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, bool constantWeights)
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
This layer represents a fully connected operation.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
GPU Execution: OpenCL: ArmCompute.
An ActivationDescriptor for the ActivationLayer.
min(a, max(b, input)) ReLu1 & ReLu6.
IConnectableLayer * AddBatchNormalizationLayer(const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr)
Adds a batch normalization layer to the network.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
IConnectableLayer * AddAdditionLayer(const char *name=nullptr)
Adds an addition layer to the network.
This layer represents a subtraction operation.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< float, 4 > SubtractionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
This layer represents a division operation.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
This layer represents a convolution 2d operation.
IConnectableLayer * AddMultiplicationLayer(const char *name=nullptr)
Adds a multiplication layer to the network.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual int Connect(IInputSlot &destination)=0
This layer represents a multiplication operation.
static INetworkPtr Create(NetworkOptions networkOptions={})
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
LayerTestResult< float, 4 > MultiplicationTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...