From 3982548e928c58b1c5d876d3b14fae452cef37ca Mon Sep 17 00:00:00 2001
From: James Conroy <james.conroy@arm.com>
Date: Thu, 27 May 2021 17:44:50 +0100
Subject: IVGCVSW-6061 Add PRELU support to TF Lite delegate

* Alpha is supported both as a constant and as an
  input tensor.

Signed-off-by: James Conroy <james.conroy@arm.com>
Change-Id: I20f30d479b87dc5fbcf75a1ce5305d70ae9b0646
---
 delegate/CMakeLists.txt               |   3 +
 delegate/src/Prelu.hpp                | 107 +++++++++++++++++++
 delegate/src/armnn_delegate.cpp       |  11 +-
 delegate/src/test/PreluTest.cpp       | 134 +++++++++++++++++++++++
 delegate/src/test/PreluTestHelper.hpp | 193 ++++++++++++++++++++++++++++++++++
 5 files changed, 443 insertions(+), 5 deletions(-)
 create mode 100644 delegate/src/Prelu.hpp
 create mode 100644 delegate/src/test/PreluTest.cpp
 create mode 100644 delegate/src/test/PreluTestHelper.hpp

diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 84c4158b5f..c7ac4390c5 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -36,6 +36,7 @@ list(APPEND armnnDelegate_sources
         src/Pack.hpp
         src/Pad.hpp
         src/Pooling.hpp
+        src/Prelu.hpp
         src/Quantization.hpp
         src/Redefine.hpp
         src/Reduce.hpp
@@ -153,6 +154,8 @@ if(BUILD_UNIT_TESTS)
         src/test/PadTestHelper.hpp
         src/test/Pooling2dTest.cpp
         src/test/Pooling2dTestHelper.hpp
+        src/test/PreluTest.cpp
+        src/test/PreluTestHelper.hpp
         src/test/QuantizationTest.cpp
         src/test/QuantizationTestHelper.hpp
         src/test/RedefineTestHelper.hpp
diff --git a/delegate/src/Prelu.hpp b/delegate/src/Prelu.hpp
new file mode 100644
index 0000000000..9baeaf475d
--- /dev/null
+++ b/delegate/src/Prelu.hpp
@@ -0,0 +1,107 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus ValidatePreluOperator(DelegateData& delegateData,
+                                   TfLiteContext* tfLiteContext,
+                                   const armnn::TensorInfo& inputInfo,
+                                   const armnn::TensorInfo& alphaInfo,
+                                   const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsPreluSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputInfo,
+                                   alphaInfo,
+                                   outputInfo);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus VisitPreluOperator(DelegateData& delegateData,
+                                TfLiteContext* tfLiteContext,
+                                TfLiteNode* tfLiteNode,
+                                int nodeIndex,
+                                int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteAlphaTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteAlphaTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& alphaTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteAlphaTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    if (!delegateData.m_Network)
+    {
+        return ValidatePreluOperator(delegateData,
+                                     tfLiteContext,
+                                     inputTensorInfo,
+                                     alphaTensorInfo,
+                                     outputTensorInfo);
+    }
+
+    armnn::IConnectableLayer* preluLayer = delegateData.m_Network->AddPreluLayer();
+    ARMNN_ASSERT(preluLayer != nullptr);
+
+    bool isConstantAlpha = tflite::IsConstantTensor(&tfLiteAlphaTensor);
+
+    // Add constant layer for constant alpha
+    if (isConstantAlpha)
+    {
+        auto constAlphaTensor = armnn::ConstTensor(alphaTensorInfo, tfLiteAlphaTensor.data.data);
+
+        armnn::IConnectableLayer* constLayer = delegateData.m_Network->AddConstantLayer(constAlphaTensor);
+        ARMNN_ASSERT(constLayer != nullptr);
+
+        constLayer->GetOutputSlot(0).SetTensorInfo(alphaTensorInfo);
+        constLayer->GetOutputSlot(0).Connect(preluLayer->GetInputSlot(1));
+    }
+
+    armnn::IOutputSlot& outputSlot = preluLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(preluLayer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index 7e4f5b5602..0c984ecc82 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -24,6 +24,7 @@
 #include "Pack.hpp"
 #include "Pad.hpp"
 #include "Pooling.hpp"
+#include "Prelu.hpp"
 #include "Quantization.hpp"
 #include "Redefine.hpp"
 #include "Reduce.hpp"
@@ -733,11 +734,11 @@ TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData,
                                     nodeIndex,
                                     kTfLiteBuiltinPadv2);
         case kTfLiteBuiltinPrelu:
-            return VisitActivationOperator(delegateData,
-                                           tfLiteContext,
-                                           tfLiteNode,
-                                           nodeIndex,
-                                           kTfLiteBuiltinPrelu);
+            return VisitPreluOperator(delegateData,
+                                      tfLiteContext,
+                                      tfLiteNode,
+                                      nodeIndex,
+                                      kTfLiteBuiltinPrelu);
         case kTfLiteBuiltinQuantize:
             return VisitQuantizeOperator(delegateData,
                                          tfLiteContext,
diff --git a/delegate/src/test/PreluTest.cpp b/delegate/src/test/PreluTest.cpp
new file mode 100644
index 0000000000..d9e08d20ca
--- /dev/null
+++ b/delegate/src/test/PreluTest.cpp
@@ -0,0 +1,134 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PreluTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate {
+
+void PreluFloatSimpleTest(std::vector <armnn::BackendId>& backends, bool isAlphaConst, bool isDynamicOutput = false) {
+    std::vector<int32_t> inputShape { 1, 2, 3 };
+    std::vector<int32_t> alphaShape { 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3 };
+
+    if (isDynamicOutput)
+    {
+        outputShape.clear();
+    }
+
+    std::vector<float> inputData = { -14.f, 2.f, 0.f, 1.f, -5.f, 14.f };
+    std::vector<float> alphaData = { 0.5f };
+    std::vector<float> expectedOutput = { -7.f, 2.f, 0.f, 1.f, -2.5f, 14.f };
+
+    PreluTest(tflite::BuiltinOperator_PRELU,
+              ::tflite::TensorType_FLOAT32,
+              backends,
+              inputShape,
+              alphaShape,
+              outputShape,
+              inputData,
+              alphaData,
+              expectedOutput,
+              isAlphaConst);
+}
+
+TEST_SUITE("Prelu_CpuRefTests")
+{
+
+TEST_CASE ("PreluFp32SimpleConstTest_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PreluFloatSimpleTest(backends, true);
+}
+
+TEST_CASE ("PreluFp32SimpleTest_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PreluFloatSimpleTest(backends, false);
+}
+
+TEST_CASE ("PreluFp32SimpleConstDynamicTest_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PreluFloatSimpleTest(backends, true, true);
+}
+
+TEST_CASE ("PreluFp32SimpleDynamicTest_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PreluFloatSimpleTest(backends, false, true);
+}
+
+} // TEST_SUITE("Prelu_CpuRefTests")
+
+TEST_SUITE("Prelu_CpuAccTests")
+{
+
+TEST_CASE ("PreluFp32SimpleConstTest_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PreluFloatSimpleTest(backends, true);
+}
+
+TEST_CASE ("PreluFp32SimpleTest_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PreluFloatSimpleTest(backends, false);
+}
+
+TEST_CASE ("PreluFp32SimpleConstDynamicTest_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PreluFloatSimpleTest(backends, true, true);
+}
+
+TEST_CASE ("PreluFp32SimpleDynamicTest_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PreluFloatSimpleTest(backends, false, true);
+}
+
+} // TEST_SUITE("Prelu_CpuAccTests")
+
+TEST_SUITE("Prelu_GpuAccTests")
+{
+
+TEST_CASE ("PreluFp32SimpleConstTest_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PreluFloatSimpleTest(backends, true);
+}
+
+TEST_CASE ("PreluFp32SimpleTest_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PreluFloatSimpleTest(backends, false);
+}
+
+TEST_CASE ("PreluFp32SimpleConstDynamicTest_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PreluFloatSimpleTest(backends, true, true);
+}
+
+TEST_CASE ("PreluFp32SimpleDynamicTest_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PreluFloatSimpleTest(backends, false, true);
+}
+
+} // TEST_SUITE("Prelu_GpuAccTests")
+
+}
\ No newline at end of file
diff --git a/delegate/src/test/PreluTestHelper.hpp b/delegate/src/test/PreluTestHelper.hpp
new file mode 100644
index 0000000000..b6c18ccdfb
--- /dev/null
+++ b/delegate/src/test/PreluTestHelper.hpp
@@ -0,0 +1,193 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreatePreluTfLiteModel(tflite::BuiltinOperator preluOperatorCode,
+                                         tflite::TensorType tensorType,
+                                         const std::vector<int32_t>& inputShape,
+                                         const std::vector<int32_t>& alphaShape,
+                                         const std::vector<int32_t>& outputShape,
+                                         std::vector<float>& alphaData,
+                                         bool alphaIsConstant)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(
+        reinterpret_cast<const uint8_t *>(alphaData.data()), sizeof(float) * alphaData.size())));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ 1.0f }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ 0 }));
+
+    auto inputTensor = CreateTensor(flatBufferBuilder,
+                                    flatBufferBuilder.CreateVector<int32_t>(inputShape.data(),
+                                                                          inputShape.size()),
+                                    tensorType,
+                                    0,
+                                    flatBufferBuilder.CreateString("input"),
+                                    quantizationParameters);
+
+    auto alphaTensor = CreateTensor(flatBufferBuilder,
+                                    flatBufferBuilder.CreateVector<int32_t>(alphaShape.data(),
+                                                                          alphaShape.size()),
+                                    tensorType,
+                                    1,
+                                    flatBufferBuilder.CreateString("alpha"),
+                                    quantizationParameters);
+
+    auto outputTensor = CreateTensor(flatBufferBuilder,
+                                     flatBufferBuilder.CreateVector<int32_t>(outputShape.data(),
+                                                                           outputShape.size()),
+                                     tensorType,
+                                     0,
+                                     flatBufferBuilder.CreateString("output"),
+                                     quantizationParameters);
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors = { inputTensor, alphaTensor, outputTensor };
+
+    const std::vector<int> operatorInputs{0, 1};
+    const std::vector<int> operatorOutputs{2};
+    flatbuffers::Offset <Operator> preluOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()));
+
+    std::vector<int> subgraphInputs{0};
+    if (!alphaIsConstant)
+    {
+        subgraphInputs.push_back(1);
+    }
+
+    const std::vector<int> subgraphOutputs{2};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&preluOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Prelu Operator Model");
+    flatbuffers::Offset <OperatorCode> opCode = CreateOperatorCode(flatBufferBuilder, preluOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&opCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+void PreluTest(tflite::BuiltinOperator preluOperatorCode,
+               tflite::TensorType tensorType,
+               const std::vector<armnn::BackendId>& backends,
+               const std::vector<int32_t>& inputShape,
+               const std::vector<int32_t>& alphaShape,
+               std::vector<int32_t>& outputShape,
+               std::vector<float>& inputData,
+               std::vector<float>& alphaData,
+               std::vector<float>& expectedOutput,
+               bool alphaIsConstant)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer = CreatePreluTfLiteModel(preluOperatorCode,
+                                                           tensorType,
+                                                           inputShape,
+                                                           alphaShape,
+                                                           outputShape,
+                                                           alphaData,
+                                                           alphaIsConstant);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    CHECK(tfLiteModel != nullptr);
+
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<float>(tfLiteInterpreter, 0, inputData);
+    armnnDelegate::FillInput<float>(armnnDelegateInterpreter, 0, inputData);
+
+    // Set alpha data if not constant
+    if (!alphaIsConstant) {
+        armnnDelegate::FillInput<float>(tfLiteInterpreter, 1, alphaData);
+        armnnDelegate::FillInput<float>(armnnDelegateInterpreter, 1, alphaData);
+    }
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+
+    auto tfLiteDelegateOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
+
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateOutputId);
+
+    for (size_t i = 0; i < expectedOutput.size(); i++)
+    {
+        CHECK(expectedOutput[i] == armnnDelegateOutputData[i]);
+        CHECK(tfLiteDelegateOutputData[i] == expectedOutput[i]);
+        CHECK(tfLiteDelegateOutputData[i] == armnnDelegateOutputData[i]);
+    }
+}
+} // anonymous namespace
\ No newline at end of file
-- 
cgit v1.2.1