aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTeresa Charlin <teresa.charlinreyes@arm.com>2020-11-26 16:54:15 +0000
committerTeresaARM <teresa.charlinreyes@arm.com>2020-11-30 16:13:01 +0000
commit31c39be002b9e9040b2306e2461ee228853b4ed6 (patch)
treee5fe131d5250796533b5b89ee5a4016e1e16afb0 /src
parent892fafe510077fac11610a9081fc10de09ffbef3 (diff)
downloadarmnn-31c39be002b9e9040b2306e2461ee228853b4ed6.tar.gz
IVGCVSW-5568 Revert "IVGCVSW-5563 Fix Crash on model with FullyConnected Sigmoid Activation"
* This reverts commit be25d94aefe53f221304b1f5f344913b708f808b. * Add Unit Test: any receiver layer + any activation layer in float and QAsymmU8 * Tidy up fuse activation tests Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: Ie059d03b85cd17eaaafe5188bb173672a1fb9ae0
Diffstat (limited to 'src')
-rw-r--r--src/armnn/test/optimizations/FuseActivationTests.cpp476
-rw-r--r--src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp10
2 files changed, 374 insertions, 112 deletions
diff --git a/src/armnn/test/optimizations/FuseActivationTests.cpp b/src/armnn/test/optimizations/FuseActivationTests.cpp
index 0e855977a0..f28abff1e7 100644
--- a/src/armnn/test/optimizations/FuseActivationTests.cpp
+++ b/src/armnn/test/optimizations/FuseActivationTests.cpp
@@ -19,10 +19,8 @@ using namespace armnn;
BOOST_AUTO_TEST_SUITE(Optimizer)
-namespace
+namespace armnn
{
-const float g_qScale = 1.0f;
-const int32_t g_qOffset = 0;
template<typename T>
std::vector<T> GetVector(unsigned int size, float initial, float increment)
@@ -40,10 +38,10 @@ std::vector<T> GetVector(unsigned int size, float initial, float increment)
return vector;
}
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct Convolution2dTest
{
- using LayerType = armnn::Convolution2dLayer;
+ using LayerType = Convolution2dLayer;
static std::string GetReceiverLayerName() { return "Convolution2d"; };
static const bool isElementWise = false;
@@ -55,7 +53,9 @@ struct Convolution2dTest
constexpr static const unsigned int outputSize = 36; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
- const char* name)
+ const char* name,
+ float scale = 1.f,
+ int32_t offset = 0)
{
Convolution2dDescriptor descriptor;
descriptor.m_BiasEnabled = false;
@@ -67,8 +67,8 @@ struct Convolution2dTest
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
- std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, g_qScale, g_qOffset);
- TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, g_qScale, g_qOffset);
+ std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
+ TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
ConstTensor weights(weightsInfo, weightsVector);
Optional<ConstTensor> optionalBias;
@@ -76,11 +76,11 @@ struct Convolution2dTest
}
};
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-struct DepthwiseConvolution2dTest
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
+struct DWConvolution2dTest
{
public:
- using LayerType = armnn::DepthwiseConvolution2dLayer;
+ using LayerType = DepthwiseConvolution2dLayer;
static std::string GetReceiverLayerName() { return "DepthwiseConvolution2d"; };
static const bool isElementWise = false;
@@ -92,7 +92,9 @@ public:
constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut;
static IConnectableLayer* AddReceiverLayer(INetwork* network,
- const char* name)
+ const char* name,
+ float scale = 1.f,
+ int32_t offset = 0)
{
DepthwiseConvolution2dDescriptor descriptor;
descriptor.m_BiasEnabled = false;
@@ -104,8 +106,8 @@ public:
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
- std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, g_qScale, g_qOffset);
- TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, g_qScale, g_qOffset);
+ std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
+ TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
ConstTensor weights(weightsInfo, weightsVector);
Optional<ConstTensor> optionalBias;
@@ -113,11 +115,11 @@ public:
}
};
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct FullyConnectedTest
{
public:
- using LayerType = armnn::FullyConnectedLayer;
+ using LayerType = FullyConnectedLayer;
static std::string GetReceiverLayerName() { return "FullyConnected"; };
static const bool isElementWise = false;
@@ -129,7 +131,9 @@ public:
constexpr static const unsigned int outputSize = 6; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
- const char* name)
+ const char* name,
+ float scale = 1.f,
+ int32_t offset = 0)
{
FullyConnectedDescriptor descriptor;
descriptor.m_BiasEnabled = false;
@@ -137,8 +141,8 @@ public:
std::vector<float> weightsData = { 1, 2, 3, 4, 5,
6, 7, 8, 9, 10,
11, 12, 13, 14, 15};
- std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, g_qScale, g_qOffset);
- TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, g_qScale, g_qOffset);
+ std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
+ TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
ConstTensor weights(weightsInfo, weightsVector);
Optional<ConstTensor> optionalBias;
@@ -146,11 +150,11 @@ public:
}
};
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct BatchNormTest
{
public:
- using LayerType = armnn::BatchNormalizationLayer;
+ using LayerType = BatchNormalizationLayer;
static std::string GetReceiverLayerName() { return "BatchNorm"; };
static const bool isElementWise = false;
@@ -161,8 +165,13 @@ public:
constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
- const char* name)
+ const char* name,
+ float scale = 1.f,
+ int32_t offset = 0)
{
+ IgnoreUnused(scale);
+ IgnoreUnused(offset);
+
BatchNormalizationDescriptor descriptor;
descriptor.m_DataLayout = DataLayout::NHWC;
@@ -181,10 +190,10 @@ public:
}
};
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct MultiplicationTest
{
- using LayerType = armnn::MultiplicationLayer;
+ using LayerType = MultiplicationLayer;
static std::string GetReceiverLayerName() { return "Multiplication"; };
static const bool isElementWise = true;
@@ -195,16 +204,21 @@ struct MultiplicationTest
constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
- const char* name)
+ const char* name,
+ float scale = 1.f,
+ int32_t offset = 0)
{
+ IgnoreUnused(scale);
+ IgnoreUnused(offset);
+
return network->AddMultiplicationLayer(name);
}
};
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct AdditionTest
{
- using LayerType = armnn::AdditionLayer;
+ using LayerType = AdditionLayer;
static std::string GetReceiverLayerName() { return "Addition"; };
static const bool isElementWise = true;
@@ -215,16 +229,21 @@ struct AdditionTest
constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
- const char* name)
+ const char* name,
+ float scale = 1.f,
+ int32_t offset = 0)
{
+ IgnoreUnused(scale);
+ IgnoreUnused(offset);
+
return network->AddAdditionLayer(name);
}
};
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct SubtractionTest
{
- using LayerType = armnn::SubtractionLayer;
+ using LayerType = SubtractionLayer;
static std::string GetReceiverLayerName() { return "Subtraction"; };
static const bool isElementWise = true;
@@ -235,16 +254,21 @@ struct SubtractionTest
constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
- const char* name)
+ const char* name,
+ float scale = 1.f,
+ int32_t offset = 0)
{
+ IgnoreUnused(scale);
+ IgnoreUnused(offset);
+
return network->AddSubtractionLayer(name);
}
};
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
struct DivisionTest
{
- using LayerType = armnn::DivisionLayer;
+ using LayerType = DivisionLayer;
static std::string GetReceiverLayerName() { return "Division"; };
static const bool isElementWise = true;
@@ -255,17 +279,21 @@ struct DivisionTest
constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
static IConnectableLayer* AddReceiverLayer(INetwork* network,
- const char* name)
+ const char* name,
+ float scale = 1.f,
+ int32_t offset = 0)
{
+ IgnoreUnused(scale);
+ IgnoreUnused(offset);
+
return network->AddDivisionLayer(name);
}
};
-} // namespace
-
template<typename LayerTest,
- armnn::DataType ArmnnType>
-INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool preventFusing)
+ DataType ArmnnType>
+INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool preventFusing,
+ float scale, int32_t offset)
{
// Create a network
INetworkPtr network = INetwork::Create();
@@ -273,7 +301,9 @@ INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool prevent
IConnectableLayer* inputLayer = network->AddInputLayer(0);
IConnectableLayer* receiverLayer = LayerTest::AddReceiverLayer(network.get(),
- "receiverLayer");
+ "receiverLayer",
+ scale,
+ offset);
IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
"activation");
@@ -282,8 +312,8 @@ INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool prevent
IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):nullptr;
// Define layers information
- TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, g_qScale, g_qOffset);
- TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, g_qScale, g_qOffset);
+ TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
+ TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
// Set layer information
inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
@@ -308,15 +338,15 @@ INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool prevent
}
template<typename LayerTest,
- armnn::DataType ArmnnType,
+ DataType ArmnnType,
typename LayerType = typename LayerTest::LayerType,
- typename T = armnn::ResolveType<ArmnnType>>
-void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, armnn::Compute
-backendId)
+ typename T = ResolveType<ArmnnType>>
+void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, Compute backendId,
+ float scale = 1.f, int32_t offset=0)
{
// FIRST NETWORK: Fused
// Construct ArmNN network
- INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false);
+ INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
// Create ArmNN runtime
IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
@@ -326,7 +356,7 @@ backendId)
Graph graphFused = PolymorphicDowncast<OptimizedNetwork*>(optNetFused.get())->GetGraph();
- auto checkFusedConv2d = [](const armnn::Layer* const layer)->bool {
+ auto checkFusedConv2d = [](const Layer* const layer)->bool {
return IsLayerOfType<LayerType>(layer) &&
(layer->GetNameStr() == "fused-activation-into-receiverLayer");
};
@@ -344,7 +374,7 @@ backendId)
//Creates structures for inputs and outputs.
std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
- std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, g_qScale, g_qOffset);
+ std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
std::vector<T> outputDataFused(LayerTest::outputSize);
InputTensors inputTensorsFused{
@@ -357,7 +387,7 @@ backendId)
// SECOND NETWORK: NotFused
// Construct ArmNN network
- INetworkPtr networkNotFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, true);
+ INetworkPtr networkNotFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, true, scale, offset);
// Create ArmNN runtime
IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
@@ -370,18 +400,18 @@ backendId)
BOOST_CHECK(5 == graphNotFused.GetNumLayers());
BOOST_TEST(CheckSequence(graphNotFused.cbegin(),
graphNotFused.cend(),
- &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<InputLayer>,
&IsLayerOfType<LayerType>,
- &IsLayerOfType<armnn::ActivationLayer>,
- &IsLayerOfType<armnn::OutputLayer>,
- &IsLayerOfType<armnn::OutputLayer>));
+ &IsLayerOfType<ActivationLayer>,
+ &IsLayerOfType<OutputLayer>,
+ &IsLayerOfType<OutputLayer>));
// Load network into runtime
NetworkId networkIdentifierNotFused;
BOOST_TEST(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
//Creates structures for inputs and outputs.
- std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, g_qScale, g_qOffset);
+ std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
std::vector<T> outputDataNotFused(LayerTest::outputSize);
std::vector<T> outputData2NotFused(LayerTest::outputSize);
@@ -402,6 +432,58 @@ backendId)
}
}
+template<typename LayerTest,
+ DataType ArmnnType,
+ typename LayerType = typename LayerTest::LayerType,
+ typename T = ResolveType<ArmnnType>>
+bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute backendId,
+ float scale = 1.f, int32_t offset = 0)
+{
+ bool success;
+ try
+ {
+ // Construct ArmNN network
+ INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
+
+ // Create ArmNN runtime
+ IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
+
+ // Optimise ArmNN network
+ IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
+
+ Graph graphFused = PolymorphicDowncast<OptimizedNetwork*>(optNetFused.get())->GetGraph();
+
+ // Load network into runtime
+ NetworkId networkIdentifier;
+ BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
+
+ //Creates structures for inputs and outputs.
+ std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
+ std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
+ std::vector<T> outputDataFused(LayerTest::outputSize);
+
+ InputTensors inputTensorsFused{
+ {0, ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
+ OutputTensors outputTensorsFused{
+ {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
+
+ // Execute network
+ run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
+
+ success = true;
+ }
+ catch (const std::exception& e)
+ {
+ std::cerr << e.what() << std::endl;
+ success = false;
+ }
+
+ return success;
+}
+
+} // namespace armnn
+
+using namespace armnn;
#if defined(ARMCOMPUTENEON_ENABLED)
// ReLu fused into Receiver Layers Float32
BOOST_AUTO_TEST_CASE(FuseReLUIntoConvFloat32CpuAccTest)
@@ -410,15 +492,15 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoConvFloat32CpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvFloat32CpuAccTest)
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
- FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat32CpuAccTest)
{
@@ -426,7 +508,7 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat32CpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat32CpuAccTest)
{
@@ -434,7 +516,7 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat32CpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
// BoundedReLu fused into Receiver Layers Float32
@@ -446,7 +528,7 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoConvFloat32CpuAccTest)
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvFloat32CpuAccTest)
{
@@ -455,8 +537,8 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvFloat32CpuAccTest)
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
- FuseActivationIntoPreviousLayerTest < DepthwiseConvolution2dTest < DataType::Float32 > , DataType::Float32 >
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > , DataType::Float32 >
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest)
{
@@ -466,7 +548,7 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest)
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoBatchNormFloat32CpuAccTest)
{
@@ -476,7 +558,7 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoBatchNormFloat32CpuAccTest)
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
// ReLU fused into Receiver Layers QAsymmU8
@@ -486,15 +568,15 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoConvQAsymmU8CpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvQAsymmU8CpuAccTest)
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
- FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest)
{
@@ -502,7 +584,7 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
// HardSwish fused into Receiver Layers Float32
@@ -512,7 +594,7 @@ BOOST_AUTO_TEST_CASE(FuseHardSwishIntoConvFloat32CpuAccTest)
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
}
// TanH fused into Receiver Layers Float32
@@ -522,7 +604,91 @@ BOOST_AUTO_TEST_CASE(FuseTanHIntoConvFloat32CpuAccTest)
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+ (activationDescriptor, 0.0001f, Compute::CpuAcc);
+}
+
+// Test that all receiver layers follow by all activation layers work, either fused or not fused
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat32CpuAccTest)
+{
+ ActivationDescriptor activationDescriptor;
+ for (int i = 0; i != 12; ++i)
+ {
+ activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
+ activationDescriptor.m_A = 1.0f;
+ activationDescriptor.m_B = -1.0f;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
+ }
+}
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat16CpuAccTest)
+{
+ ActivationDescriptor activationDescriptor;
+ for (int i = 0; i != 12; ++i)
+ {
+ activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
+ activationDescriptor.m_A = 1.0f;
+ activationDescriptor.m_B = -1.0f;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
+ }
+}
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationQAsymmU8CpuAccTest)
+{
+ ActivationDescriptor activationDescriptor;
+
+ activationDescriptor.m_Function = ActivationFunction::Sigmoid;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+
+ activationDescriptor.m_Function = ActivationFunction::TanH;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+
+ activationDescriptor.m_Function = ActivationFunction::ReLu;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+
+ activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
+ activationDescriptor.m_A = 1.0f;
+ activationDescriptor.m_B = -1.0f;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+
+ activationDescriptor.m_Function = ActivationFunction::HardSwish;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
}
#endif
@@ -534,15 +700,15 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoConvFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvFloat32GpuAccTest)
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
- FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat32GpuAccTest)
{
@@ -550,7 +716,7 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat32GpuAccTest)
{
@@ -558,7 +724,7 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoMulFloat32GpuAccTest)
{
@@ -566,7 +732,7 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoMulFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoAddFloat32GpuAccTest)
{
@@ -574,7 +740,7 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoAddFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoSubFloat32GpuAccTest)
{
@@ -582,7 +748,7 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoSubFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUIntoDivFloat32GpuAccTest)
{
@@ -590,7 +756,7 @@ BOOST_AUTO_TEST_CASE(FuseReLUIntoDivFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// BoundedReLu fused into Receiver Layers Float32
@@ -602,7 +768,7 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoConvFloat32GpuAccTest)
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvFloat32GpuAccTest)
{
@@ -611,8 +777,8 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvFloat32GpuAccTest)
activationDescriptor.m_A = 1.0f;
activationDescriptor.m_B = -1.0f;
- FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest)
{
@@ -622,7 +788,7 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest)
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoBatchNormFloat32GpuAccTest)
{
@@ -632,7 +798,7 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoBatchNormFloat32GpuAccTest)
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoMulFloat32GpuAccTest)
{
@@ -642,7 +808,7 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoMulFloat32GpuAccTest)
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoAddFloat32GpuAccTest)
{
@@ -652,7 +818,7 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoAddFloat32GpuAccTest)
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoSubFloat32GpuAccTest)
{
@@ -662,7 +828,7 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoSubFloat32GpuAccTest)
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDivFloat32GpuAccTest)
{
@@ -672,7 +838,7 @@ BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDivFloat32GpuAccTest)
activationDescriptor.m_B = -1.0f;
FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// ReLU fused into Receiver Layers QAsymmU8
@@ -682,15 +848,15 @@ BOOST_AUTO_TEST_CASE(FuseReLUQIntoConvAsymmU8GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUQIntoDWConvAsymmU8GpuAccTest)
{
ActivationDescriptor activationDescriptor;
activationDescriptor.m_Function = ActivationFunction::ReLu;
- FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest)
{
@@ -698,7 +864,7 @@ BOOST_AUTO_TEST_CASE(FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::ReLu;
FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// HardSwish fused into Receiver Layers Float32
@@ -708,7 +874,7 @@ BOOST_AUTO_TEST_CASE(FuseHardSwishIntoConvFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseHardSwishIntoMulFloat32GpuAccTest)
{
@@ -716,7 +882,7 @@ BOOST_AUTO_TEST_CASE(FuseHardSwishIntoMulFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseHardSwishIntoAddFloat32GpuAccTest)
{
@@ -724,7 +890,7 @@ BOOST_AUTO_TEST_CASE(FuseHardSwishIntoAddFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseHardSwishIntoSubFloat32GpuAccTest)
{
@@ -732,7 +898,7 @@ BOOST_AUTO_TEST_CASE(FuseHardSwishIntoSubFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseHardSwishIntoDivFloat32GpuAccTest)
{
@@ -740,7 +906,7 @@ BOOST_AUTO_TEST_CASE(FuseHardSwishIntoDivFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::HardSwish;
FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
// TanH fused into Receiver Layers Float32
@@ -750,7 +916,7 @@ BOOST_AUTO_TEST_CASE(FuseTanHIntoConvFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseTanHIntoMulFloat32GpuAccTest)
{
@@ -758,7 +924,7 @@ BOOST_AUTO_TEST_CASE(FuseTanHIntoMulFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseTanHIntoAddFloat32GpuAccTest)
{
@@ -766,7 +932,7 @@ BOOST_AUTO_TEST_CASE(FuseTanHIntoAddFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseTanHIntoSubFloat32GpuAccTest)
{
@@ -774,7 +940,7 @@ BOOST_AUTO_TEST_CASE(FuseTanHIntoSubFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
}
BOOST_AUTO_TEST_CASE(FuseTanHIntoDivFloat32GpuAccTest)
{
@@ -782,7 +948,113 @@ BOOST_AUTO_TEST_CASE(FuseTanHIntoDivFloat32GpuAccTest)
activationDescriptor.m_Function = ActivationFunction::TanH;
FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
- (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+ (activationDescriptor, 0.0001f, Compute::GpuAcc);
+}
+
+// Test that all receiver layers follow by all activation layers work, either fused or not fused
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat32GpuAccTest)
+{
+ ActivationDescriptor activationDescriptor;
+ for (int i = 0; i != 12; ++i)
+ {
+ activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
+ activationDescriptor.m_A = 1.0f;
+ activationDescriptor.m_B = -1.0f;
+ if (activationDescriptor.m_Function != ActivationFunction::Elu)
+ {
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::GpuAcc)), "DepthwiseConvolution + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>, DataType::Float32>
+ (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
+ }
+ }
+}
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat16GpuAccTest)
+{
+ ActivationDescriptor activationDescriptor;
+ for (int i = 0; i != 12; ++i)
+ {
+ activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
+ activationDescriptor.m_A = 1.0f;
+ activationDescriptor.m_B = -1.0f;
+ if (activationDescriptor.m_Function != ActivationFunction::Elu)
+ {
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::GpuAcc)), "Depthwise + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>, DataType::Float16>
+ (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
+ }
+ }
+}
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationQAsymmU8GpuAccTest)
+{
+ ActivationDescriptor activationDescriptor;
+
+ activationDescriptor.m_Function = ActivationFunction::Sigmoid;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+
+ activationDescriptor.m_Function = ActivationFunction::TanH;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+
+ activationDescriptor.m_Function = ActivationFunction::ReLu;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+
+ activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
+ activationDescriptor.m_A = 1.0f;
+ activationDescriptor.m_B = -1.0f;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+
+ activationDescriptor.m_Function = ActivationFunction::HardSwish;
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
+ BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+ (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
+ static_cast<int>(activationDescriptor.m_Function));
}
#endif
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index 39fb4c959b..31489a0c32 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -27,16 +27,6 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const FullyConnectedDescriptor& descriptor,
const ActivationDescriptor* activationDescriptor)
{
- if (activationDescriptor)
- {
- std::vector<ActivationFunction> activations = {ActivationFunction::ReLu, ActivationFunction::BoundedReLu};
- if (std::find(activations.begin(), activations.end(), activationDescriptor->m_Function) == activations.end())
- {
- return arm_compute::Status{
- arm_compute::ErrorCode::RUNTIME_ERROR, "NeonFullyConnectedWorkload :Unsupported Activation Function"};
- }
- }
-
const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);