From eb2b329b761ce3206505ed8d2eab071a2f97d5e7 Mon Sep 17 00:00:00 2001 From: Nattapat Chaimanowong Date: Tue, 7 May 2019 12:02:30 +0100 Subject: IVGCVSW-2997 Refactor reference LSTM workload Signed-off-by: Nattapat Chaimanowong Change-Id: I6883f878d9f701a55153292769d2fc0530d2529e --- .../reference/workloads/RefLstmFloat32Workload.cpp | 379 --------------------- 1 file changed, 379 deletions(-) delete mode 100644 src/backends/reference/workloads/RefLstmFloat32Workload.cpp (limited to 'src/backends/reference/workloads/RefLstmFloat32Workload.cpp') diff --git a/src/backends/reference/workloads/RefLstmFloat32Workload.cpp b/src/backends/reference/workloads/RefLstmFloat32Workload.cpp deleted file mode 100644 index c697b66658..0000000000 --- a/src/backends/reference/workloads/RefLstmFloat32Workload.cpp +++ /dev/null @@ -1,379 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "RefLstmFloat32Workload.hpp" -#include "RefWorkloadUtils.hpp" -#include "Activation.hpp" - -namespace -{ - -// Helper functions ported from the Android code base -// Refer to: android/external/tensorflow/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc - -void MatrixBatchVectorMultiplyAccumulate(const float* matrix, - uint32_t mRows, - uint32_t mCols, - const float* vector, - uint32_t nBatch, - float* outResult, - int resultStride = 1) -{ - float* resultInBatch = outResult; - for (uint32_t b = 0; b < nBatch; b++) - { - const float* matrixPtr = matrix; - for (uint32_t r = 0; r < mRows; r++) - { - const float* vectorInBatch = vector + b * mCols; - for (uint32_t c = 0; c < mCols; c++) - { - *resultInBatch += *matrixPtr++ * *vectorInBatch++; - } - resultInBatch += resultStride; - } - } -} - -void VectorBatchVectorAssign(const float* vector, - uint32_t vSize, - uint32_t nBatch, - float* outBatchVector) -{ - for (uint32_t b = 0; b < nBatch; b++) - { - memcpy(outBatchVector + b * vSize, vector, vSize * sizeof(float)); - } -} - -void VectorBatchVectorCwiseProductAccumulate(const float* vector, - uint32_t vSize, - const float* batchVector, - uint32_t nBatch, - float* outResult) -{ - for (uint32_t b = 0; b < nBatch; b++) - { - for (uint32_t v = 0; v < vSize; v++) - { - *outResult++ += vector[v] * *batchVector++; - } - } -} - -void Sub1Vector(const float* vector, - uint32_t vSize, - float* result) -{ - for (uint32_t v = 0; v < vSize; v++) - { - *result++ = 1.0f - *vector++; - } -} - -void VectorVectorCwiseProduct(const float* vector1, - const float* vector2, - uint32_t vSize, - float* outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - *outResult++ = *vector1++ * *vector2++; - } -} - -void VectorVectorCwiseProductAccumulate(const float* vector1, - const float* vector2, - uint32_t vSize, - float* outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - *outResult++ += *vector1++ * *vector2++; - } -} - -float Clip(float f, - float absLimit) -{ - float result = (absLimit < f) ? absLimit : f; - result = (-absLimit > result) ? -absLimit : result; - return result; -} - -void ClipVector(const float* vector, - uint32_t vSize, - float absLimit, - float* outResult) -{ - for (uint32_t v = 0; v < vSize; v++) - { - *outResult++ = Clip(*vector++, absLimit); - } -} - -void CopyVector(const float* vector, - uint32_t vSize, - float* outResult) -{ - memcpy(outResult, vector, vSize * sizeof(float)); -} - -void SetActivationParameters(uint32_t activation, - armnn::ActivationFunction& outArmnnActivation, - float& outA, - float& outB) -{ - switch (activation) - { - case 0: // None - outA = 0; - outB = 0; - return; - - case 1: // Relu - outArmnnActivation = armnn::ActivationFunction::ReLu; - outA = 0; - outB = 0; - return; - - case 3: // Relu6 - outArmnnActivation = armnn::ActivationFunction::BoundedReLu; - outA = 6; - outB = 0; - return; - - case 4: // Tanh - outArmnnActivation = armnn::ActivationFunction::TanH; - outA = 1; - outB = 1; - return; - - case 6: // Sigmoid - outArmnnActivation = armnn::ActivationFunction::Sigmoid; - outA = 0; - outB = 0; - return; - - default: - throw armnn::Exception("Unsupported activation function: " + std::to_string(activation)); - } -} - -std::unique_ptr AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr) -{ - if (!ptr) - { - return nullptr; - } - - return std::make_unique(*ptr); -} - -} // anonymous namespace - -namespace armnn -{ - -RefLstmFloat32Workload::RefLstmFloat32Workload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) - : Float32Workload(descriptor, info) - , m_InputToInputWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_InputToInputWeights)) - , m_InputToForgetWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_InputToForgetWeights)) - , m_InputToCellWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_InputToCellWeights)) - , m_InputToOutputWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_InputToOutputWeights)) - , m_RecurrentToInputWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_RecurrentToInputWeights)) - , m_RecurrentToForgetWeightsTensor(AssignScopedCpuTensorHandle(descriptor.m_RecurrentToForgetWeights)) - , m_RecurrentToCellWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_RecurrentToCellWeights)) - , m_RecurrentToOutputWeightsTensor(AssignScopedCpuTensorHandle(descriptor.m_RecurrentToOutputWeights)) - , m_CellToInputWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_CellToInputWeights)) - , m_CellToForgetWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_CellToForgetWeights)) - , m_CellToOutputWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_CellToOutputWeights)) - , m_InputGateBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_InputGateBias)) - , m_ForgetGateBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_ForgetGateBias)) - , m_CellBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_CellBias)) - , m_OutputGateBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_OutputGateBias)) - , m_ProjectionWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_ProjectionWeights)) - , m_ProjectionBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_ProjectionBias)) -{} - -void RefLstmFloat32Workload::Execute() const -{ - // This is a porting of the LSTM::Eval() method in the Android code base - // Refer to: android/frameworks/ml/nn/common/operations/LSTM.cpp - - const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); - const TensorShape& inputShape = inputInfo.GetShape(); - - float* scratchBuffer = GetOutputTensorDataFloat(0, m_Data); - float* outputStateOut = GetOutputTensorDataFloat(1, m_Data); - float* cellStateOut = GetOutputTensorDataFloat(2, m_Data); - float* output = GetOutputTensorDataFloat(3, m_Data); - - const float* inputData = GetInputTensorDataFloat(0, m_Data); - const float* outputStateIn = GetInputTensorDataFloat(1, m_Data); - const float* cellStateIn = GetInputTensorDataFloat(2, m_Data); - - const uint32_t nBatch = inputShape[0]; - const uint32_t nInput = inputShape[1]; - - const uint32_t nCell = m_InputToOutputWeightsTensor->GetShape()[0]; - const uint32_t nOutput = m_RecurrentToOutputWeightsTensor->GetShape()[1]; - - const bool useCifg = m_Data.m_Parameters.m_CifgEnabled; - const bool usePeephole = m_Data.m_Parameters.m_PeepholeEnabled; - - // Index the scratch buffers pointers to the global scratch buffer. - float* inputGateScratch = nullptr; - float* cellScratch = nullptr; - float* forgetGateScratch = nullptr; - float* outputGateScratch = nullptr; - - if (useCifg) - { - cellScratch = scratchBuffer + 0 * nCell * nBatch; - forgetGateScratch = scratchBuffer + 1 * nCell * nBatch; - outputGateScratch = scratchBuffer + 2 * nCell * nBatch; - } - else - { - inputGateScratch = scratchBuffer + 0 * nCell * nBatch; - cellScratch = scratchBuffer + 1 * nCell * nBatch; - forgetGateScratch = scratchBuffer + 2 * nCell * nBatch; - outputGateScratch = scratchBuffer + 3 * nCell * nBatch; - } - - // Initialize scratch buffers with bias. - if (!useCifg) - { - VectorBatchVectorAssign(m_InputGateBiasTensor->GetTensor(), - nCell, nBatch, inputGateScratch); - } - VectorBatchVectorAssign(m_ForgetGateBiasTensor->GetTensor(), - nCell, nBatch, forgetGateScratch); - VectorBatchVectorAssign(m_CellBiasTensor->GetTensor(), - nCell, nBatch, cellScratch); - VectorBatchVectorAssign(m_OutputGateBiasTensor->GetTensor(), - nCell, nBatch, outputGateScratch); - - // For each batch and cell: compute input_weight * input. - if (!useCifg) - { - MatrixBatchVectorMultiplyAccumulate(m_InputToInputWeightsTensor->GetTensor(), - nCell, nInput, inputData, nBatch, inputGateScratch); - } - MatrixBatchVectorMultiplyAccumulate(m_InputToForgetWeightsTensor->GetTensor(), - nCell, nInput, inputData, nBatch, forgetGateScratch); - MatrixBatchVectorMultiplyAccumulate(m_InputToCellWeightsTensor->GetTensor(), - nCell, nInput, inputData, nBatch, cellScratch); - MatrixBatchVectorMultiplyAccumulate(m_InputToOutputWeightsTensor->GetTensor(), - nCell, nInput, inputData, nBatch, outputGateScratch); - - // For each batch and cell: compute recurrent_weight * output_state. - if (!useCifg) - { - MatrixBatchVectorMultiplyAccumulate(m_RecurrentToInputWeightsTensor->GetTensor(), - nCell, nOutput, outputStateIn, nBatch, inputGateScratch); - } - MatrixBatchVectorMultiplyAccumulate(m_RecurrentToForgetWeightsTensor->GetTensor(), - nCell, nOutput, outputStateIn, nBatch, forgetGateScratch); - MatrixBatchVectorMultiplyAccumulate(m_RecurrentToCellWeightsTensor->GetTensor(), - nCell, nOutput, outputStateIn, nBatch, cellScratch); - MatrixBatchVectorMultiplyAccumulate(m_RecurrentToOutputWeightsTensor->GetTensor(), - nCell, nOutput, outputStateIn, nBatch, outputGateScratch); - - // For each batch and cell: update input gate. - if (!useCifg) - { - if (usePeephole) - { - VectorBatchVectorCwiseProductAccumulate(m_CellToInputWeightsTensor->GetTensor(), - nCell, cellStateIn, nBatch, inputGateScratch); - } - Activation(inputGateScratch, inputGateScratch, - TensorInfo({nCell, nBatch}, DataType::Float32), - ActivationFunction::Sigmoid, 0, 0); - } - - // For each batch and cell: update forget gate. - if (usePeephole) - { - VectorBatchVectorCwiseProductAccumulate(m_CellToForgetWeightsTensor->GetTensor(), nCell, - cellStateIn, nBatch, forgetGateScratch); - } - Activation(forgetGateScratch, forgetGateScratch, - TensorInfo({nCell, nBatch}, DataType::Float32), - ActivationFunction::Sigmoid, 0, 0); - - // For each batch and cell: update the cell. - VectorVectorCwiseProduct(forgetGateScratch, cellStateIn, nBatch * nCell, cellStateOut); - - ActivationFunction armnnActivationFunc = ActivationFunction::Sigmoid; - float a = 0; - float b = 0; - SetActivationParameters(m_Data.m_Parameters.m_ActivationFunc, armnnActivationFunc, a, b); - - if (m_Data.m_Parameters.m_ActivationFunc > 0) - { - Activation(cellScratch, cellScratch, - TensorInfo({nCell, nBatch}, DataType::Float32), - armnnActivationFunc, a, b); - } - if (useCifg) - { - Sub1Vector(forgetGateScratch, nBatch * nCell, forgetGateScratch); - VectorVectorCwiseProductAccumulate(cellScratch, forgetGateScratch, nBatch * nCell, cellStateOut); - } - else - { - VectorVectorCwiseProductAccumulate(cellScratch, inputGateScratch, nBatch * nCell, cellStateOut); - } - if (m_Data.m_Parameters.m_ClippingThresCell > 0.0) - { - ClipVector(cellStateOut, nBatch * nCell, m_Data.m_Parameters.m_ClippingThresCell, cellStateOut); - } - - // For each batch and cell: update the output gate. - if (usePeephole) - { - VectorBatchVectorCwiseProductAccumulate(m_CellToOutputWeightsTensor->GetTensor(), - nCell, cellStateOut, nBatch, outputGateScratch); - } - Activation(outputGateScratch, outputGateScratch, - TensorInfo({nCell, nBatch}, DataType::Float32), - ActivationFunction::Sigmoid, 0, 0); - - if (m_Data.m_Parameters.m_ActivationFunc > 0) - { - Activation(cellStateOut, cellScratch, - TensorInfo({nCell, nBatch}, DataType::Float32), - armnnActivationFunc, a, b); - } - VectorVectorCwiseProduct(outputGateScratch, cellScratch, nBatch * nCell, outputGateScratch); - - // For each batch: update the projection and output_state. - if (m_Data.m_Parameters.m_ProjectionEnabled) - { - if (m_ProjectionBiasTensor) - { - VectorBatchVectorAssign(m_ProjectionBiasTensor->GetTensor(), - nOutput, nBatch, output); - } - MatrixBatchVectorMultiplyAccumulate(m_ProjectionWeightsTensor->GetTensor(), - nOutput, nCell, outputGateScratch, nBatch, output); - - if (m_Data.m_Parameters.m_ClippingThresProj > 0.0) - { - ClipVector(output, nBatch * nOutput, m_Data.m_Parameters.m_ClippingThresProj, output); - } - } - else - { - CopyVector(outputGateScratch, nBatch * nOutput, output); - } - - CopyVector(output, nBatch * nOutput, outputStateOut); -} - -} //namespace armnn -- cgit v1.2.1