aboutsummaryrefslogtreecommitdiff
path: root/src/backends/reference/workloads/RefLstmFloat32Workload.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/reference/workloads/RefLstmFloat32Workload.cpp')
-rw-r--r--src/backends/reference/workloads/RefLstmFloat32Workload.cpp379
1 files changed, 0 insertions, 379 deletions
diff --git a/src/backends/reference/workloads/RefLstmFloat32Workload.cpp b/src/backends/reference/workloads/RefLstmFloat32Workload.cpp
deleted file mode 100644
index c697b66658..0000000000
--- a/src/backends/reference/workloads/RefLstmFloat32Workload.cpp
+++ /dev/null
@@ -1,379 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "RefLstmFloat32Workload.hpp"
-#include "RefWorkloadUtils.hpp"
-#include "Activation.hpp"
-
-namespace
-{
-
-// Helper functions ported from the Android code base
-// Refer to: android/external/tensorflow/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
-
-void MatrixBatchVectorMultiplyAccumulate(const float* matrix,
- uint32_t mRows,
- uint32_t mCols,
- const float* vector,
- uint32_t nBatch,
- float* outResult,
- int resultStride = 1)
-{
- float* resultInBatch = outResult;
- for (uint32_t b = 0; b < nBatch; b++)
- {
- const float* matrixPtr = matrix;
- for (uint32_t r = 0; r < mRows; r++)
- {
- const float* vectorInBatch = vector + b * mCols;
- for (uint32_t c = 0; c < mCols; c++)
- {
- *resultInBatch += *matrixPtr++ * *vectorInBatch++;
- }
- resultInBatch += resultStride;
- }
- }
-}
-
-void VectorBatchVectorAssign(const float* vector,
- uint32_t vSize,
- uint32_t nBatch,
- float* outBatchVector)
-{
- for (uint32_t b = 0; b < nBatch; b++)
- {
- memcpy(outBatchVector + b * vSize, vector, vSize * sizeof(float));
- }
-}
-
-void VectorBatchVectorCwiseProductAccumulate(const float* vector,
- uint32_t vSize,
- const float* batchVector,
- uint32_t nBatch,
- float* outResult)
-{
- for (uint32_t b = 0; b < nBatch; b++)
- {
- for (uint32_t v = 0; v < vSize; v++)
- {
- *outResult++ += vector[v] * *batchVector++;
- }
- }
-}
-
-void Sub1Vector(const float* vector,
- uint32_t vSize,
- float* result)
-{
- for (uint32_t v = 0; v < vSize; v++)
- {
- *result++ = 1.0f - *vector++;
- }
-}
-
-void VectorVectorCwiseProduct(const float* vector1,
- const float* vector2,
- uint32_t vSize,
- float* outResult)
-{
- for (uint32_t v = 0; v < vSize; v++)
- {
- *outResult++ = *vector1++ * *vector2++;
- }
-}
-
-void VectorVectorCwiseProductAccumulate(const float* vector1,
- const float* vector2,
- uint32_t vSize,
- float* outResult)
-{
- for (uint32_t v = 0; v < vSize; v++)
- {
- *outResult++ += *vector1++ * *vector2++;
- }
-}
-
-float Clip(float f,
- float absLimit)
-{
- float result = (absLimit < f) ? absLimit : f;
- result = (-absLimit > result) ? -absLimit : result;
- return result;
-}
-
-void ClipVector(const float* vector,
- uint32_t vSize,
- float absLimit,
- float* outResult)
-{
- for (uint32_t v = 0; v < vSize; v++)
- {
- *outResult++ = Clip(*vector++, absLimit);
- }
-}
-
-void CopyVector(const float* vector,
- uint32_t vSize,
- float* outResult)
-{
- memcpy(outResult, vector, vSize * sizeof(float));
-}
-
-void SetActivationParameters(uint32_t activation,
- armnn::ActivationFunction& outArmnnActivation,
- float& outA,
- float& outB)
-{
- switch (activation)
- {
- case 0: // None
- outA = 0;
- outB = 0;
- return;
-
- case 1: // Relu
- outArmnnActivation = armnn::ActivationFunction::ReLu;
- outA = 0;
- outB = 0;
- return;
-
- case 3: // Relu6
- outArmnnActivation = armnn::ActivationFunction::BoundedReLu;
- outA = 6;
- outB = 0;
- return;
-
- case 4: // Tanh
- outArmnnActivation = armnn::ActivationFunction::TanH;
- outA = 1;
- outB = 1;
- return;
-
- case 6: // Sigmoid
- outArmnnActivation = armnn::ActivationFunction::Sigmoid;
- outA = 0;
- outB = 0;
- return;
-
- default:
- throw armnn::Exception("Unsupported activation function: " + std::to_string(activation));
- }
-}
-
-std::unique_ptr<armnn::ScopedCpuTensorHandle> AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr)
-{
- if (!ptr)
- {
- return nullptr;
- }
-
- return std::make_unique<armnn::ScopedCpuTensorHandle>(*ptr);
-}
-
-} // anonymous namespace
-
-namespace armnn
-{
-
-RefLstmFloat32Workload::RefLstmFloat32Workload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info)
- : Float32Workload<LstmQueueDescriptor>(descriptor, info)
- , m_InputToInputWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_InputToInputWeights))
- , m_InputToForgetWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_InputToForgetWeights))
- , m_InputToCellWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_InputToCellWeights))
- , m_InputToOutputWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_InputToOutputWeights))
- , m_RecurrentToInputWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_RecurrentToInputWeights))
- , m_RecurrentToForgetWeightsTensor(AssignScopedCpuTensorHandle(descriptor.m_RecurrentToForgetWeights))
- , m_RecurrentToCellWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_RecurrentToCellWeights))
- , m_RecurrentToOutputWeightsTensor(AssignScopedCpuTensorHandle(descriptor.m_RecurrentToOutputWeights))
- , m_CellToInputWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_CellToInputWeights))
- , m_CellToForgetWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_CellToForgetWeights))
- , m_CellToOutputWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_CellToOutputWeights))
- , m_InputGateBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_InputGateBias))
- , m_ForgetGateBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_ForgetGateBias))
- , m_CellBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_CellBias))
- , m_OutputGateBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_OutputGateBias))
- , m_ProjectionWeightsTensor (AssignScopedCpuTensorHandle(descriptor.m_ProjectionWeights))
- , m_ProjectionBiasTensor (AssignScopedCpuTensorHandle(descriptor.m_ProjectionBias))
-{}
-
-void RefLstmFloat32Workload::Execute() const
-{
- // This is a porting of the LSTM::Eval() method in the Android code base
- // Refer to: android/frameworks/ml/nn/common/operations/LSTM.cpp
-
- const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
- const TensorShape& inputShape = inputInfo.GetShape();
-
- float* scratchBuffer = GetOutputTensorDataFloat(0, m_Data);
- float* outputStateOut = GetOutputTensorDataFloat(1, m_Data);
- float* cellStateOut = GetOutputTensorDataFloat(2, m_Data);
- float* output = GetOutputTensorDataFloat(3, m_Data);
-
- const float* inputData = GetInputTensorDataFloat(0, m_Data);
- const float* outputStateIn = GetInputTensorDataFloat(1, m_Data);
- const float* cellStateIn = GetInputTensorDataFloat(2, m_Data);
-
- const uint32_t nBatch = inputShape[0];
- const uint32_t nInput = inputShape[1];
-
- const uint32_t nCell = m_InputToOutputWeightsTensor->GetShape()[0];
- const uint32_t nOutput = m_RecurrentToOutputWeightsTensor->GetShape()[1];
-
- const bool useCifg = m_Data.m_Parameters.m_CifgEnabled;
- const bool usePeephole = m_Data.m_Parameters.m_PeepholeEnabled;
-
- // Index the scratch buffers pointers to the global scratch buffer.
- float* inputGateScratch = nullptr;
- float* cellScratch = nullptr;
- float* forgetGateScratch = nullptr;
- float* outputGateScratch = nullptr;
-
- if (useCifg)
- {
- cellScratch = scratchBuffer + 0 * nCell * nBatch;
- forgetGateScratch = scratchBuffer + 1 * nCell * nBatch;
- outputGateScratch = scratchBuffer + 2 * nCell * nBatch;
- }
- else
- {
- inputGateScratch = scratchBuffer + 0 * nCell * nBatch;
- cellScratch = scratchBuffer + 1 * nCell * nBatch;
- forgetGateScratch = scratchBuffer + 2 * nCell * nBatch;
- outputGateScratch = scratchBuffer + 3 * nCell * nBatch;
- }
-
- // Initialize scratch buffers with bias.
- if (!useCifg)
- {
- VectorBatchVectorAssign(m_InputGateBiasTensor->GetTensor<float>(),
- nCell, nBatch, inputGateScratch);
- }
- VectorBatchVectorAssign(m_ForgetGateBiasTensor->GetTensor<float>(),
- nCell, nBatch, forgetGateScratch);
- VectorBatchVectorAssign(m_CellBiasTensor->GetTensor<float>(),
- nCell, nBatch, cellScratch);
- VectorBatchVectorAssign(m_OutputGateBiasTensor->GetTensor<float>(),
- nCell, nBatch, outputGateScratch);
-
- // For each batch and cell: compute input_weight * input.
- if (!useCifg)
- {
- MatrixBatchVectorMultiplyAccumulate(m_InputToInputWeightsTensor->GetTensor<float>(),
- nCell, nInput, inputData, nBatch, inputGateScratch);
- }
- MatrixBatchVectorMultiplyAccumulate(m_InputToForgetWeightsTensor->GetTensor<float>(),
- nCell, nInput, inputData, nBatch, forgetGateScratch);
- MatrixBatchVectorMultiplyAccumulate(m_InputToCellWeightsTensor->GetTensor<float>(),
- nCell, nInput, inputData, nBatch, cellScratch);
- MatrixBatchVectorMultiplyAccumulate(m_InputToOutputWeightsTensor->GetTensor<float>(),
- nCell, nInput, inputData, nBatch, outputGateScratch);
-
- // For each batch and cell: compute recurrent_weight * output_state.
- if (!useCifg)
- {
- MatrixBatchVectorMultiplyAccumulate(m_RecurrentToInputWeightsTensor->GetTensor<float>(),
- nCell, nOutput, outputStateIn, nBatch, inputGateScratch);
- }
- MatrixBatchVectorMultiplyAccumulate(m_RecurrentToForgetWeightsTensor->GetTensor<float>(),
- nCell, nOutput, outputStateIn, nBatch, forgetGateScratch);
- MatrixBatchVectorMultiplyAccumulate(m_RecurrentToCellWeightsTensor->GetTensor<float>(),
- nCell, nOutput, outputStateIn, nBatch, cellScratch);
- MatrixBatchVectorMultiplyAccumulate(m_RecurrentToOutputWeightsTensor->GetTensor<float>(),
- nCell, nOutput, outputStateIn, nBatch, outputGateScratch);
-
- // For each batch and cell: update input gate.
- if (!useCifg)
- {
- if (usePeephole)
- {
- VectorBatchVectorCwiseProductAccumulate(m_CellToInputWeightsTensor->GetTensor<float>(),
- nCell, cellStateIn, nBatch, inputGateScratch);
- }
- Activation(inputGateScratch, inputGateScratch,
- TensorInfo({nCell, nBatch}, DataType::Float32),
- ActivationFunction::Sigmoid, 0, 0);
- }
-
- // For each batch and cell: update forget gate.
- if (usePeephole)
- {
- VectorBatchVectorCwiseProductAccumulate(m_CellToForgetWeightsTensor->GetTensor<float>(), nCell,
- cellStateIn, nBatch, forgetGateScratch);
- }
- Activation(forgetGateScratch, forgetGateScratch,
- TensorInfo({nCell, nBatch}, DataType::Float32),
- ActivationFunction::Sigmoid, 0, 0);
-
- // For each batch and cell: update the cell.
- VectorVectorCwiseProduct(forgetGateScratch, cellStateIn, nBatch * nCell, cellStateOut);
-
- ActivationFunction armnnActivationFunc = ActivationFunction::Sigmoid;
- float a = 0;
- float b = 0;
- SetActivationParameters(m_Data.m_Parameters.m_ActivationFunc, armnnActivationFunc, a, b);
-
- if (m_Data.m_Parameters.m_ActivationFunc > 0)
- {
- Activation(cellScratch, cellScratch,
- TensorInfo({nCell, nBatch}, DataType::Float32),
- armnnActivationFunc, a, b);
- }
- if (useCifg)
- {
- Sub1Vector(forgetGateScratch, nBatch * nCell, forgetGateScratch);
- VectorVectorCwiseProductAccumulate(cellScratch, forgetGateScratch, nBatch * nCell, cellStateOut);
- }
- else
- {
- VectorVectorCwiseProductAccumulate(cellScratch, inputGateScratch, nBatch * nCell, cellStateOut);
- }
- if (m_Data.m_Parameters.m_ClippingThresCell > 0.0)
- {
- ClipVector(cellStateOut, nBatch * nCell, m_Data.m_Parameters.m_ClippingThresCell, cellStateOut);
- }
-
- // For each batch and cell: update the output gate.
- if (usePeephole)
- {
- VectorBatchVectorCwiseProductAccumulate(m_CellToOutputWeightsTensor->GetTensor<float>(),
- nCell, cellStateOut, nBatch, outputGateScratch);
- }
- Activation(outputGateScratch, outputGateScratch,
- TensorInfo({nCell, nBatch}, DataType::Float32),
- ActivationFunction::Sigmoid, 0, 0);
-
- if (m_Data.m_Parameters.m_ActivationFunc > 0)
- {
- Activation(cellStateOut, cellScratch,
- TensorInfo({nCell, nBatch}, DataType::Float32),
- armnnActivationFunc, a, b);
- }
- VectorVectorCwiseProduct(outputGateScratch, cellScratch, nBatch * nCell, outputGateScratch);
-
- // For each batch: update the projection and output_state.
- if (m_Data.m_Parameters.m_ProjectionEnabled)
- {
- if (m_ProjectionBiasTensor)
- {
- VectorBatchVectorAssign(m_ProjectionBiasTensor->GetTensor<float>(),
- nOutput, nBatch, output);
- }
- MatrixBatchVectorMultiplyAccumulate(m_ProjectionWeightsTensor->GetTensor<float>(),
- nOutput, nCell, outputGateScratch, nBatch, output);
-
- if (m_Data.m_Parameters.m_ClippingThresProj > 0.0)
- {
- ClipVector(output, nBatch * nOutput, m_Data.m_Parameters.m_ClippingThresProj, output);
- }
- }
- else
- {
- CopyVector(outputGateScratch, nBatch * nOutput, output);
- }
-
- CopyVector(output, nBatch * nOutput, outputStateOut);
-}
-
-} //namespace armnn