aboutsummaryrefslogtreecommitdiff
path: root/src/backends/reference/workloads/LstmUtils.cpp
diff options
context:
space:
mode:
authorJan Eilers <jan.eilers@arm.com>2019-06-26 13:10:09 +0100
committerJan Eilers <jan.eilers@arm.com>2019-07-02 09:59:37 +0000
commit38e05bd2836b1b65b440330a9c283038ba4192c3 (patch)
treec232f71ce6a101c70ed65e046678f7b22593dbe4 /src/backends/reference/workloads/LstmUtils.cpp
parentd0c0cc3e27f1ada9df167d3b9ff248be432d16e1 (diff)
downloadarmnn-38e05bd2836b1b65b440330a9c283038ba4192c3.tar.gz
IVGCVSW-3236 Extend Ref LSTM with layer normalization support
* Add descriptor values * Update lstm queue descriptor validate function * Update lstm workload * Update isLstmSupported (Cl and Ref), LayerSupportBase, ILayerSupport * Update lstm layer * Add unit tests Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: I932175d550facfb342325051eaa7bd2084ebdc18 Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Diffstat (limited to 'src/backends/reference/workloads/LstmUtils.cpp')
-rw-r--r--src/backends/reference/workloads/LstmUtils.cpp307
1 files changed, 307 insertions, 0 deletions
diff --git a/src/backends/reference/workloads/LstmUtils.cpp b/src/backends/reference/workloads/LstmUtils.cpp
new file mode 100644
index 0000000000..f197aae291
--- /dev/null
+++ b/src/backends/reference/workloads/LstmUtils.cpp
@@ -0,0 +1,307 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+//#pragma once
+
+#include "LstmUtils.hpp"
+#include "BaseIterator.hpp"
+#include <backendsCommon/CpuTensorHandle.hpp>
+
+
+// Helper functions ported from the Android code base
+// Refer to: android/external/tensorflow/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc
+
+void VectorBatchVectorAdd(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Decoder<float>& batchVector,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outResult )
+{
+ for (uint32_t b = 0; b < nBatch; b++)
+ {
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(batchVector.Get() + vector.Get());
+ ++outResult;
+ ++vector;
+ ++batchVector;
+ }
+ vector -= vSize;
+ }
+ batchVector -= vSize * nBatch;
+ outResult -= vSize * nBatch;
+}
+
+
+// Layer norm for each batch.
+// normalization_epsilon is added to avoid divergence.
+void MeanStddevNormalization(armnn::Decoder<float>& input_vector,
+ armnn::Encoder<float>& output_vector,
+ uint32_t v_size,
+ uint32_t n_batch,
+ float normalization_epsilon)
+{
+ for (uint32_t batch = 0; batch < n_batch; ++batch) {
+ float sum = 0.0f;
+ float sum_sq = 0.0f;
+ for (uint32_t i = 0; i < v_size; ++i) {
+ sum += input_vector.Get();
+ sum_sq += input_vector.Get() * input_vector.Get();
+ ++input_vector;
+ }
+ input_vector -= v_size;
+
+ const float mean = sum / static_cast<float>(v_size);
+ float stddev_inv = 0.0f;
+ const float variance = sum_sq / static_cast<float>(v_size) - mean * mean;
+ if (variance == 0) {
+ stddev_inv = 1.0f / std::sqrt(normalization_epsilon);
+ } else {
+ stddev_inv = 1.0f / std::sqrt(variance);
+ }
+
+ for (uint32_t i = 0; i < v_size; ++i) {
+ output_vector.Set((input_vector.Get() - mean) * stddev_inv);
+ ++output_vector;
+ ++input_vector;
+ }
+ // Don't reset iterator to handle next batch
+ }
+ output_vector -= v_size * n_batch;
+ input_vector -= v_size * n_batch;
+}
+
+void ZeroVector(armnn::Encoder<float>& vector,
+ uint32_t vSize)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ vector.Set(0.0f);
+ ++vector;
+ }
+ vector -= vSize;
+}
+
+void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder<float>& matrix,
+ uint32_t mRows,
+ uint32_t mCols,
+ armnn::Decoder<float>& vector,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t b = 0; b < nBatch; b++)
+ {
+ for (uint32_t r = 0; r < mRows; r++)
+ {
+ vector += b * mCols;
+ for (uint32_t c = 0; c < mCols; c++)
+ {
+ outResult.Set(outResult.Get() + matrix.Get() * vector.Get());
+ ++matrix;
+ ++vector;
+ }
+ outResult += 1;
+ vector -= (b+1) * mCols;
+ }
+ matrix -= (mRows * mCols);
+ }
+ outResult -= (mRows * nBatch);
+}
+
+void VectorBatchVectorAssign(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outBatchVector)
+{
+ for (uint32_t b = 0; b < nBatch; b++)
+ {
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outBatchVector.Set(vector.Get());
+ ++outBatchVector;
+ ++vector;
+ }
+ vector -= vSize;
+ }
+ outBatchVector -= (nBatch * vSize);
+}
+
+void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Decoder<float>& batchVector,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t b = 0; b < nBatch; b++)
+ {
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(outResult.Get() + vector.Get() * batchVector.Get());
+ ++outResult;
+ ++vector;
+ ++batchVector;
+ }
+ vector -= vSize;
+ }
+ batchVector -= vSize * nBatch;
+ outResult -= vSize * nBatch;
+}
+
+void VectorBatchVectorCwiseProduct(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Decoder<float>& batchVector,
+ uint32_t nBatch,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t b = 0; b < nBatch; b++)
+ {
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(vector.Get() * batchVector.Get());
+ ++outResult;
+ ++vector;
+ ++batchVector;
+ }
+ vector -= vSize;
+ }
+ batchVector -= vSize * nBatch;
+ outResult -= vSize * nBatch;
+}
+
+void Sub1Vector(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Encoder<float>& result)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ result.Set(1.0f - vector.Get());
+ ++vector;
+ ++result;
+ }
+ vector -= vSize;
+ result -= vSize;
+}
+
+void VectorVectorCwiseProduct(armnn::Decoder<float>& vector1,
+ armnn::Decoder<float>& vector2,
+ uint32_t vSize,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(vector1.Get() * vector2.Get());
+ ++outResult;
+ ++vector1;
+ ++vector2;
+ }
+ outResult -= vSize;
+ vector1 -= vSize;
+ vector2 -= vSize;
+}
+
+void VectorVectorCwiseProductAccumulate(armnn::Decoder<float>& vector1,
+ armnn::Decoder<float>& vector2,
+ uint32_t vSize,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(outResult.Get() + vector1.Get() * vector2.Get());
+ ++outResult;
+ ++vector1;
+ ++vector2;
+ }
+ outResult -= vSize;
+ vector1 -= vSize;
+ vector2 -= vSize;
+}
+
+float Clip(float f,
+ float absLimit)
+{
+ float result = (absLimit < f) ? absLimit : f;
+ result = (-absLimit > result) ? -absLimit : result;
+ return result;
+}
+
+void ClipVector(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ float absLimit,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(Clip(vector.Get(), absLimit));
+ ++vector;
+ ++outResult;
+ }
+ vector -= vSize;
+ outResult -= vSize;
+}
+
+void CopyVector(armnn::Decoder<float>& vector,
+ uint32_t vSize,
+ armnn::Encoder<float>& outResult)
+{
+ for (uint32_t v = 0; v < vSize; v++)
+ {
+ outResult.Set(vector.Get());
+ ++outResult;
+ ++vector;
+ }
+ outResult -= vSize;
+ vector -= vSize;
+}
+
+void SetActivationParameters(uint32_t activation,
+ armnn::ActivationFunction& outArmnnActivation,
+ float& outA,
+ float& outB)
+{
+ switch (activation)
+ {
+ case 0: // None
+ outA = 0;
+ outB = 0;
+ return;
+
+ case 1: // Relu
+ outArmnnActivation = armnn::ActivationFunction::ReLu;
+ outA = 0;
+ outB = 0;
+ return;
+
+ case 3: // Relu6
+ outArmnnActivation = armnn::ActivationFunction::BoundedReLu;
+ outA = 6;
+ outB = 0;
+ return;
+
+ case 4: // Tanh
+ outArmnnActivation = armnn::ActivationFunction::TanH;
+ outA = 1;
+ outB = 1;
+ return;
+
+ case 6: // Sigmoid
+ outArmnnActivation = armnn::ActivationFunction::Sigmoid;
+ outA = 0;
+ outB = 0;
+ return;
+
+ default:
+ throw armnn::Exception("Unsupported activation function: " + std::to_string(activation));
+ }
+}
+
+std::unique_ptr<armnn::ScopedCpuTensorHandle> AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr)
+{
+ if (!ptr)
+ {
+ return nullptr;
+ }
+
+ return std::make_unique<armnn::ScopedCpuTensorHandle>(*ptr);
+}