diff options
author | Jan Eilers <jan.eilers@arm.com> | 2019-06-26 13:10:09 +0100 |
---|---|---|
committer | Jan Eilers <jan.eilers@arm.com> | 2019-07-02 09:59:37 +0000 |
commit | 38e05bd2836b1b65b440330a9c283038ba4192c3 (patch) | |
tree | c232f71ce6a101c70ed65e046678f7b22593dbe4 /src/backends/reference/workloads/LstmUtils.cpp | |
parent | d0c0cc3e27f1ada9df167d3b9ff248be432d16e1 (diff) | |
download | armnn-38e05bd2836b1b65b440330a9c283038ba4192c3.tar.gz |
IVGCVSW-3236 Extend Ref LSTM with layer normalization support
* Add descriptor values
* Update lstm queue descriptor validate function
* Update lstm workload
* Update isLstmSupported (Cl and Ref), LayerSupportBase, ILayerSupport
* Update lstm layer
* Add unit tests
Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: I932175d550facfb342325051eaa7bd2084ebdc18
Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Diffstat (limited to 'src/backends/reference/workloads/LstmUtils.cpp')
-rw-r--r-- | src/backends/reference/workloads/LstmUtils.cpp | 307 |
1 files changed, 307 insertions, 0 deletions
diff --git a/src/backends/reference/workloads/LstmUtils.cpp b/src/backends/reference/workloads/LstmUtils.cpp new file mode 100644 index 0000000000..f197aae291 --- /dev/null +++ b/src/backends/reference/workloads/LstmUtils.cpp @@ -0,0 +1,307 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +//#pragma once + +#include "LstmUtils.hpp" +#include "BaseIterator.hpp" +#include <backendsCommon/CpuTensorHandle.hpp> + + +// Helper functions ported from the Android code base +// Refer to: android/external/tensorflow/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc + +void VectorBatchVectorAdd(armnn::Decoder<float>& vector, + uint32_t vSize, + armnn::Decoder<float>& batchVector, + uint32_t nBatch, + armnn::Encoder<float>& outResult ) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(batchVector.Get() + vector.Get()); + ++outResult; + ++vector; + ++batchVector; + } + vector -= vSize; + } + batchVector -= vSize * nBatch; + outResult -= vSize * nBatch; +} + + +// Layer norm for each batch. +// normalization_epsilon is added to avoid divergence. +void MeanStddevNormalization(armnn::Decoder<float>& input_vector, + armnn::Encoder<float>& output_vector, + uint32_t v_size, + uint32_t n_batch, + float normalization_epsilon) +{ + for (uint32_t batch = 0; batch < n_batch; ++batch) { + float sum = 0.0f; + float sum_sq = 0.0f; + for (uint32_t i = 0; i < v_size; ++i) { + sum += input_vector.Get(); + sum_sq += input_vector.Get() * input_vector.Get(); + ++input_vector; + } + input_vector -= v_size; + + const float mean = sum / static_cast<float>(v_size); + float stddev_inv = 0.0f; + const float variance = sum_sq / static_cast<float>(v_size) - mean * mean; + if (variance == 0) { + stddev_inv = 1.0f / std::sqrt(normalization_epsilon); + } else { + stddev_inv = 1.0f / std::sqrt(variance); + } + + for (uint32_t i = 0; i < v_size; ++i) { + output_vector.Set((input_vector.Get() - mean) * stddev_inv); + ++output_vector; + ++input_vector; + } + // Don't reset iterator to handle next batch + } + output_vector -= v_size * n_batch; + input_vector -= v_size * n_batch; +} + +void ZeroVector(armnn::Encoder<float>& vector, + uint32_t vSize) +{ + for (uint32_t v = 0; v < vSize; v++) + { + vector.Set(0.0f); + ++vector; + } + vector -= vSize; +} + +void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder<float>& matrix, + uint32_t mRows, + uint32_t mCols, + armnn::Decoder<float>& vector, + uint32_t nBatch, + armnn::Encoder<float>& outResult) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t r = 0; r < mRows; r++) + { + vector += b * mCols; + for (uint32_t c = 0; c < mCols; c++) + { + outResult.Set(outResult.Get() + matrix.Get() * vector.Get()); + ++matrix; + ++vector; + } + outResult += 1; + vector -= (b+1) * mCols; + } + matrix -= (mRows * mCols); + } + outResult -= (mRows * nBatch); +} + +void VectorBatchVectorAssign(armnn::Decoder<float>& vector, + uint32_t vSize, + uint32_t nBatch, + armnn::Encoder<float>& outBatchVector) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outBatchVector.Set(vector.Get()); + ++outBatchVector; + ++vector; + } + vector -= vSize; + } + outBatchVector -= (nBatch * vSize); +} + +void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder<float>& vector, + uint32_t vSize, + armnn::Decoder<float>& batchVector, + uint32_t nBatch, + armnn::Encoder<float>& outResult) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(outResult.Get() + vector.Get() * batchVector.Get()); + ++outResult; + ++vector; + ++batchVector; + } + vector -= vSize; + } + batchVector -= vSize * nBatch; + outResult -= vSize * nBatch; +} + +void VectorBatchVectorCwiseProduct(armnn::Decoder<float>& vector, + uint32_t vSize, + armnn::Decoder<float>& batchVector, + uint32_t nBatch, + armnn::Encoder<float>& outResult) +{ + for (uint32_t b = 0; b < nBatch; b++) + { + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(vector.Get() * batchVector.Get()); + ++outResult; + ++vector; + ++batchVector; + } + vector -= vSize; + } + batchVector -= vSize * nBatch; + outResult -= vSize * nBatch; +} + +void Sub1Vector(armnn::Decoder<float>& vector, + uint32_t vSize, + armnn::Encoder<float>& result) +{ + for (uint32_t v = 0; v < vSize; v++) + { + result.Set(1.0f - vector.Get()); + ++vector; + ++result; + } + vector -= vSize; + result -= vSize; +} + +void VectorVectorCwiseProduct(armnn::Decoder<float>& vector1, + armnn::Decoder<float>& vector2, + uint32_t vSize, + armnn::Encoder<float>& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(vector1.Get() * vector2.Get()); + ++outResult; + ++vector1; + ++vector2; + } + outResult -= vSize; + vector1 -= vSize; + vector2 -= vSize; +} + +void VectorVectorCwiseProductAccumulate(armnn::Decoder<float>& vector1, + armnn::Decoder<float>& vector2, + uint32_t vSize, + armnn::Encoder<float>& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(outResult.Get() + vector1.Get() * vector2.Get()); + ++outResult; + ++vector1; + ++vector2; + } + outResult -= vSize; + vector1 -= vSize; + vector2 -= vSize; +} + +float Clip(float f, + float absLimit) +{ + float result = (absLimit < f) ? absLimit : f; + result = (-absLimit > result) ? -absLimit : result; + return result; +} + +void ClipVector(armnn::Decoder<float>& vector, + uint32_t vSize, + float absLimit, + armnn::Encoder<float>& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(Clip(vector.Get(), absLimit)); + ++vector; + ++outResult; + } + vector -= vSize; + outResult -= vSize; +} + +void CopyVector(armnn::Decoder<float>& vector, + uint32_t vSize, + armnn::Encoder<float>& outResult) +{ + for (uint32_t v = 0; v < vSize; v++) + { + outResult.Set(vector.Get()); + ++outResult; + ++vector; + } + outResult -= vSize; + vector -= vSize; +} + +void SetActivationParameters(uint32_t activation, + armnn::ActivationFunction& outArmnnActivation, + float& outA, + float& outB) +{ + switch (activation) + { + case 0: // None + outA = 0; + outB = 0; + return; + + case 1: // Relu + outArmnnActivation = armnn::ActivationFunction::ReLu; + outA = 0; + outB = 0; + return; + + case 3: // Relu6 + outArmnnActivation = armnn::ActivationFunction::BoundedReLu; + outA = 6; + outB = 0; + return; + + case 4: // Tanh + outArmnnActivation = armnn::ActivationFunction::TanH; + outA = 1; + outB = 1; + return; + + case 6: // Sigmoid + outArmnnActivation = armnn::ActivationFunction::Sigmoid; + outA = 0; + outB = 0; + return; + + default: + throw armnn::Exception("Unsupported activation function: " + std::to_string(activation)); + } +} + +std::unique_ptr<armnn::ScopedCpuTensorHandle> AssignScopedCpuTensorHandle(const armnn::ConstCpuTensorHandle* ptr) +{ + if (!ptr) + { + return nullptr; + } + + return std::make_unique<armnn::ScopedCpuTensorHandle>(*ptr); +} |