diff options
Diffstat (limited to 'src/armnn')
321 files changed, 33271 insertions, 0 deletions
diff --git a/src/armnn/Descriptors.cpp b/src/armnn/Descriptors.cpp new file mode 100644 index 0000000000..0b11b44260 --- /dev/null +++ b/src/armnn/Descriptors.cpp @@ -0,0 +1,279 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "armnn/Descriptors.hpp" + +#include <algorithm> +#include <array> +#include <vector> + +#include <boost/format.hpp> +#include <boost/log/trivial.hpp> +#include <boost/numeric/conversion/cast.hpp> + +namespace armnn +{ + +PermutationVector::PermutationVector(const ValueType *dimMappings, const SizeType numDimMappings) +{ + // Validation + + if (numDimMappings > MaxNumOfTensorDimensions) + { + boost::format fmt("The number of mappings (%1%) cannot be greater " + "than the maximum number of dimensions supported (%2%)"); + throw InvalidArgumentException(boost::str(fmt % numDimMappings % MaxNumOfTensorDimensions)); + } + + if ((dimMappings == nullptr) && (numDimMappings != 0)) + { + throw InvalidArgumentException("Dimension mappings must not be NULL if the number of mappings is positive"); + } + + for (SizeType i = 0; i < numDimMappings; ++i) + { + const ValueType dstIndex = dimMappings[i]; + if (dstIndex >= numDimMappings) + { + boost::format fmt("Dimension mapping at index %1% is invalid: %2% is outside of the valid range [0,%3%]"); + throw InvalidArgumentException(boost::str(fmt % i % dstIndex % (numDimMappings - 1))); + } + } + + // Validation: Detect duplicates + { + std::array<bool, MaxNumOfTensorDimensions> observedDims; + observedDims.fill(false); + + for (SizeType i = 0; i < numDimMappings; ++i) + { + const ValueType dstIndex = dimMappings[i]; + if (observedDims[dstIndex]) + { + throw InvalidArgumentException("Invalid dimension mappings: Two or more source dimensions are mapped " + "to the same output dimension"); + } + observedDims[dstIndex] = true; + } + } + + // Initialize + for (SizeType i = 0; i < numDimMappings; ++i) + { + m_DimMappings[i] = dimMappings[i]; + } + m_NumDimMappings = numDimMappings; +} + +PermutationVector::PermutationVector(std::initializer_list<ValueType> dimMappings) + : PermutationVector(dimMappings.begin(), boost::numeric_cast<SizeType>(dimMappings.size())) +{ +} + +OriginsDescriptor::OriginsDescriptor() +: m_NumViews(0) +, m_NumDimensions(0) +, m_ViewOrigins(nullptr) +{} + +OriginsDescriptor::OriginsDescriptor(uint32_t numViews, uint32_t numDimensions /*= 4*/) +: m_NumViews(numViews) +, m_NumDimensions(numDimensions) +, m_ViewOrigins(numViews && numDimensions > 0 ? new uint32_t *[numViews]() : nullptr) +{ + for (uint32_t i = 0; m_NumDimensions > 0 && i < m_NumViews; ++i) + { + m_ViewOrigins[i] = new uint32_t[m_NumDimensions](); + } +} + +OriginsDescriptor::OriginsDescriptor(const OriginsDescriptor& other) +: m_NumViews(other.m_NumViews) +, m_NumDimensions(other.m_NumDimensions) +, m_ViewOrigins(other.m_NumViews && other.m_NumDimensions > 0 ? new uint32_t *[other.m_NumViews]() : nullptr) +{ + for (uint32_t i = 0; m_NumDimensions > 0 && i < m_NumViews; ++i) + { + m_ViewOrigins[i] = new uint32_t[m_NumDimensions](); + memcpy(m_ViewOrigins[i], other.m_ViewOrigins[i], m_NumDimensions * sizeof(uint32_t)); + } +} + +OriginsDescriptor::OriginsDescriptor(OriginsDescriptor&& other) +: OriginsDescriptor() +{ + swap(*this, other); +} + +OriginsDescriptor::~OriginsDescriptor() +{ + for (uint32_t i = 0; m_NumDimensions > 0 && i < m_NumViews; ++i) + { + delete[] m_ViewOrigins[i]; + } + delete[] m_ViewOrigins; +} + +OriginsDescriptor& OriginsDescriptor::operator=(OriginsDescriptor rhs) +{ + swap(*this, rhs); + return *this; +} + +Status OriginsDescriptor::SetViewOriginCoord(uint32_t view, uint32_t coord, uint32_t value) +{ + if (view >= m_NumViews) + { + BOOST_LOG_TRIVIAL(error) << "OriginsDescriptor::SetViewOriginCoord: view argument:" << view << + " is out of range"; + return Status::Failure; + } + if (coord >= m_NumDimensions) + { + BOOST_LOG_TRIVIAL(error) << "OriginsDescriptor::SetViewOriginCoord: coord argument:" << coord << + " is out of range"; + return Status::Failure; + } + + m_ViewOrigins[view][coord] = value; + return Status::Success; +} + + +uint32_t OriginsDescriptor::GetNumViews() const +{ + return m_NumViews; +} + +uint32_t OriginsDescriptor::GetNumDimensions() const +{ + return m_NumDimensions; +} + +const uint32_t* OriginsDescriptor::GetViewOrigin(uint32_t idx) const +{ + return m_ViewOrigins ? m_ViewOrigins[idx] : nullptr; +} + + +// Reorder the viewOrigins in accordance with the indices presented in newOrdering array +void OriginsDescriptor::ReorderOrigins(unsigned int* newOrdering, unsigned int numNewOrdering) +{ + BOOST_ASSERT_MSG(m_NumViews == numNewOrdering, "number of views must match number of " + "elements in the new ordering array"); + std::vector<uint32_t*> viewOrigins(&m_ViewOrigins[0], &m_ViewOrigins[m_NumViews]); + + for (unsigned int i = 0; i < numNewOrdering; ++i) + { + m_ViewOrigins[i] = viewOrigins[newOrdering[i]]; + } +} + +ViewsDescriptor::ViewsDescriptor() +: m_Origins() +, m_ViewSizes(nullptr) +{} + +ViewsDescriptor::ViewsDescriptor(uint32_t numViews, uint32_t numDimensions /*= 4*/) + : m_Origins(numViews, numDimensions) + , m_ViewSizes(numViews && numDimensions > 0 ? new uint32_t *[numViews]() : nullptr) +{ + for (uint32_t i = 0; GetNumDimensions() > 0 && i < GetNumViews(); ++i) + { + m_ViewSizes[i] = new uint32_t[GetNumDimensions()](); + } +} + +ViewsDescriptor::ViewsDescriptor(const ViewsDescriptor& other) + : m_Origins(other.m_Origins) + , m_ViewSizes(other.GetNumViews() && other.GetNumDimensions() > 0 ? new uint32_t *[other.GetNumViews()]() : nullptr) +{ + for (uint32_t i = 0; GetNumDimensions() > 0 && i < GetNumViews(); ++i) + { + m_ViewSizes[i] = new uint32_t[GetNumDimensions()](); + memcpy(m_ViewSizes[i], other.m_ViewSizes[i], GetNumDimensions() * sizeof(uint32_t)); + } +} + +ViewsDescriptor::ViewsDescriptor(ViewsDescriptor&& other) + : ViewsDescriptor() +{ + swap(*this, other); +} + +ViewsDescriptor::~ViewsDescriptor() +{ + for (uint32_t i = 0; GetNumDimensions() > 0 && i < GetNumViews(); ++i) + { + delete[] m_ViewSizes[i]; + } + delete[] m_ViewSizes; +} + +ViewsDescriptor& ViewsDescriptor::operator=(ViewsDescriptor rhs) +{ + swap(*this, rhs); + return *this; +} + +uint32_t ViewsDescriptor::GetNumViews() const +{ + return m_Origins.GetNumViews(); +} + +uint32_t ViewsDescriptor::GetNumDimensions() const +{ + return m_Origins.GetNumDimensions(); +} + +const uint32_t* ViewsDescriptor::GetViewOrigin(uint32_t idx) const +{ + return m_Origins.GetViewOrigin(idx); +} + +Status ViewsDescriptor::SetViewOriginCoord(uint32_t view, uint32_t coord, uint32_t value) +{ + return m_Origins.SetViewOriginCoord(view, coord, value); +} + +Status ViewsDescriptor::SetViewSize(uint32_t view, uint32_t coord, uint32_t value) +{ + if (view >= GetNumViews()) + { + BOOST_LOG_TRIVIAL(error) << "ViewsDescriptor::SetViewSize: view argument:" << view << + " is out of range"; + return Status::Failure; + } + if (coord >= GetNumDimensions()) + { + BOOST_LOG_TRIVIAL(error) << "ViewsDescriptor::SetViewSize: coord argument:" << coord << + " is out of range"; + return Status::Failure; + } + + m_ViewSizes[view][coord] = value; + return Status::Success; +} + +const uint32_t* ViewsDescriptor::GetViewSizes(uint32_t idx) const +{ + return m_ViewSizes ? m_ViewSizes[idx] : nullptr; +} + +void swap(OriginsDescriptor& first, OriginsDescriptor& second) +{ + using std::swap; + swap(first.m_NumViews, second.m_NumViews); + swap(first.m_NumDimensions, second.m_NumDimensions); + swap(first.m_ViewOrigins, second.m_ViewOrigins); +} + +void swap(ViewsDescriptor& first, ViewsDescriptor& second) +{ + using std::swap; + swap(first.m_Origins, second.m_Origins); + swap(first.m_ViewSizes, second.m_ViewSizes); +} + +} diff --git a/src/armnn/Exceptions.cpp b/src/armnn/Exceptions.cpp new file mode 100644 index 0000000000..2cf95fa4d1 --- /dev/null +++ b/src/armnn/Exceptions.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "armnn/Exceptions.hpp" + +#include <string> + +namespace armnn +{ + +Exception::Exception(const std::string& message) +: m_Message(message) +{ +} + +const char* Exception::what() const noexcept +{ + return m_Message.c_str(); +} + +UnimplementedException::UnimplementedException() +: Exception("Function not yet implemented") +{ +} + +} diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp new file mode 100644 index 0000000000..97f702e50f --- /dev/null +++ b/src/armnn/Graph.cpp @@ -0,0 +1,169 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "Graph.hpp" +#include "Layers.hpp" + +#include <armnn/Utils.hpp> +#include <armnn/TypesUtils.hpp> + +#include <boost/polymorphic_cast.hpp> +#include <boost/log/trivial.hpp> +#include <boost/assert.hpp> +#include <boost/format.hpp> + +#include <unordered_map> + +namespace armnn +{ + +Graph::Graph(const Graph& other) +: m_LayersInOrder(other.m_LayersInOrder) +{ + std::unordered_map<const Layer*, Layer*> otherToClonedMap; + + for (auto&& otherLayer : other.m_Layers) + { + Layer* const layer = otherLayer->Clone(*this); + otherToClonedMap.emplace(otherLayer, layer); + } + + // Copy slot connections + for (auto&& otherLayer : other.m_Layers) + { + Layer* const thisLayer = otherToClonedMap[otherLayer]; + + auto outputSlot = thisLayer->BeginOutputSlots(); + for (auto&& otherOutputSlot : otherLayer->GetOutputSlots()) + { + for (auto&& otherInputSlot : otherOutputSlot.GetConnections()) + { + const Layer& otherTgtLayer = otherInputSlot->GetOwningLayer(); + Layer* const thisTgtLayer = otherToClonedMap[&otherTgtLayer]; + + InputSlot& inputSlot = thisTgtLayer->GetInputSlot(otherInputSlot->GetSlotIndex()); + outputSlot->Connect(inputSlot); + } + outputSlot->SetTensorInfo(otherOutputSlot.GetTensorInfo()); + ++outputSlot; + } + } +} + +Status Graph::Print() const +{ + if (m_Layers.empty()) + { + BOOST_LOG_TRIVIAL(info) << "\n Graph is empty.\n"; + return Status::Success; + } + BOOST_LOG_TRIVIAL(info) << "\n"; + BOOST_LOG_TRIVIAL(info) << "Walking Pattern: \n"; + + for (auto&& it : TopologicalSort()) + { + BOOST_LOG_TRIVIAL(info) << it->GetName() << ":" << GetLayerTypeAsCString(it->GetType()) + << ":" << GetComputeDeviceAsCString(it->GetComputeDevice()); + } + BOOST_LOG_TRIVIAL(info) << "\n\n"; + + return Status::Success; +} + +Status Graph::AllocateDynamicBuffers() +{ + for (auto&& layer : m_Layers) + { + for (auto slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot) + { + slot->GetOutputHandler().AllocateTensors(); + } + } + return Status::Success; +} + +const Graph& Graph::TopologicalSort() const +{ + if (!m_LayersInOrder) + { + //Reset layer order + for (auto&& it : m_Layers) + { + it->ResetPriority(); + } + + auto compareLayerPriority = [](const LayersList::value_type& layerA, const LayersList::value_type& layerB) + { + return layerA->GetPriority() < layerB->GetPriority(); + }; + + m_Layers.sort(compareLayerPriority); + + m_LayersInOrder = true; + } + + return *this; +} + +void Graph::AddCopyLayers() +{ + // Returns true if the given layer could potentially need an intermediate copy layer (depending on its + // connections to other layers). At the time of writing, copy layers will be inserted in the following situations: + // CPU -> CL (and viceversa) + // CPU -> Neon (and viceversa) + auto MayNeedCopyLayer = [](const Layer& layer) + { + // All layers should have been associated with a valid compute device at this point + BOOST_ASSERT(layer.GetComputeDevice() != Compute::Undefined); + // Do not need another copy layer if copy layer is already present + return layer.GetType() != LayerType::MemCopy; + }; + + for (auto&& srcLayer : m_Layers) + { + if (MayNeedCopyLayer(*srcLayer)) + { + unsigned int srcOutputIndex = 0; + for (auto&& srcOutput : srcLayer->GetOutputSlots()) + { + for (auto&& dstInput : srcOutput.GetConnections()) + { + Layer& dstLayer = dstInput->GetOwningLayer(); + + if (MayNeedCopyLayer(dstLayer) && (dstLayer.GetComputeDevice() != srcLayer->GetComputeDevice())) + { + // A copy layer is needed in between the source and destination layers + // Record the operation rather than attempting to modify the graph as we go + // (invalidating iterators) + const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]") + % srcLayer->GetName() + % srcOutputIndex + % dstLayer.GetName() + % dstInput->GetSlotIndex()); + + MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInput, copyLayerName.c_str()); + copyLayer->SetComputeDevice(dstLayer.GetComputeDevice()); + } + } + ++srcOutputIndex; + } + } + } +} + +void Graph::InferTensorInfos() +{ + for (auto&& layer : TopologicalSort()) + { + for (auto&& input : layer->GetInputSlots()) + { + boost::ignore_unused(input); + BOOST_ASSERT_MSG(input.GetConnectedOutputSlot()->IsTensorInfoSet(), + "All inputs must have the TensorInfo set at this point."); + } + layer->ValidateTensorShapesFromInputs(); + } +} + +} // namespace armnn diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp new file mode 100644 index 0000000000..8888034197 --- /dev/null +++ b/src/armnn/Graph.hpp @@ -0,0 +1,315 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Layers.hpp" + +#include <armnn/Types.hpp> +#include <armnn/TensorFwd.hpp> +#include <armnn/NetworkFwd.hpp> +#include <armnn/Exceptions.hpp> + +#include <list> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include <boost/assert.hpp> +#include <boost/iterator/transform_iterator.hpp> + +namespace armnn +{ +class Graph +{ +public: + template <typename CVLayerT> + static CVLayerT* PtrCast(Layer* const layer) + { + return boost::polymorphic_downcast<CVLayerT*>(layer); + } + + using LayersList = std::list<Layer*>; + using Iterator = LayersList::const_iterator; // const so pointers in the list can't be modified externally + using ConstIterator = boost::transform_iterator<decltype(&PtrCast<const Layer>), Iterator>; + using IteratorDifference = Iterator::difference_type; + + using ConstIteratorInputs = boost::transform_iterator<decltype(&PtrCast<const InputLayer>), Iterator>; + using ConstIteratorOutputs = boost::transform_iterator<decltype(&PtrCast<const OutputLayer>), Iterator>; + + /// Wrapper class returned by Graph::GetInputLayers() + struct InputLayersAccessor + { + explicit InputLayersAccessor(const Graph& graph) : m_Graph(graph) {} + + ConstIteratorInputs begin() const + { + return { m_Graph.m_Layers.begin(), &PtrCast<const InputLayer> }; + } + + ConstIteratorInputs end() const + { + return { std::next(m_Graph.m_Layers.begin(), static_cast<IteratorDifference>(m_Graph.GetNumInputs())), + &PtrCast<const InputLayer> }; + } + + const Graph& m_Graph; + }; + + /// Wrapper class returned by Graph::GetOutputLayers() + struct OutputLayersAccessor + { + explicit OutputLayersAccessor(const Graph& graph) : m_Graph(graph) {} + + ConstIteratorOutputs begin() const + { + return { std::prev(m_Graph.m_Layers.end(), static_cast<IteratorDifference>(m_Graph.GetNumOutputs())), + &PtrCast<const OutputLayer> }; + } + + ConstIteratorOutputs end() const + { + return { m_Graph.m_Layers.end(), &PtrCast<const OutputLayer> }; + } + + const Graph& m_Graph; + }; + + Graph() : m_LayersInOrder(true) {} + + Graph(const Graph& other); + + Graph& operator=(const Graph& other) = delete; + + ~Graph() + { + for (auto&& layer : m_Layers) + { + delete layer; + } + } + + Status Print() const; + + /// Adds a new layer of type LaterType to the graph constructed with the arguments passed. + template <typename LayerT, typename... Args> + LayerT* AddLayer(Args&&... args); + + /// Inserts a new layer between the output slot currently connected to insertBefore + /// and insertBefore itself. + template <typename LayerT, typename... Args> + LayerT* InsertNewLayer(InputSlot& insertBefore, Args&&... args); + + /// Deletes the layer at the specified position and returns an iterator pointing + /// to the next element after the one being deleted. + Iterator EraseLayer(Iterator pos); + + /// Deletes the layer and returns an iterator pointing to the next layer in the graph + /// (next in the list, after the one being deleted). Sets @a layer to nullptr on return. + /// Templated to support pointers to any layer type. + template <typename LayerT> + Iterator EraseLayer(LayerT*& layer); + + /// Return iterator pointing to begin of list. Lowercase for range-based for loops. + Iterator begin() { return m_Layers.begin(); } + /// Return iterator pointing to end of list. Lowercase for range-based for loops. + Iterator end() { return m_Layers.end(); } + + /// Return const iterator pointing to begin of list. Lowercase for range-based for loops. + ConstIterator begin() const { return {m_Layers.begin(), &PtrCast<const Layer>}; } + /// Return const iterator pointing to end of list. Lowercase for range-based for loops. + ConstIterator end() const { return {m_Layers.end(), &PtrCast<const Layer>}; } + + /// Sort layers in topological order and return this. + Graph& TopologicalSort() { const_cast<const Graph*>(this)->TopologicalSort(); return *this; } + const Graph& TopologicalSort() const; + + size_t GetNumInputs() const { return m_InputIds.size(); } + size_t GetNumOutputs() const { return m_OutputIds.size(); } + + /// Returns a wrapper object with begin(), end() methods to iterate over the input layers + /// in a range-based for loop + InputLayersAccessor GetInputLayers() const { return InputLayersAccessor(*this); } + + /// Returns a wrapper object with begin(), end() methods to iterate over the output layers + /// in a range-based for loop + OutputLayersAccessor GetOutputLayers() const { return OutputLayersAccessor(*this); } + + size_t GetNumLayers() const { return m_Layers.size(); } + + /// Allocate memory for all tensors under output tensor handers of each layer + Status AllocateDynamicBuffers(); + + /// Modifies the graph in-place, removing edges connecting layers using different compute devices, + /// and relinking them via an intermediary copy layers. + void AddCopyLayers(); + + void InferTensorInfos(); + +private: + template <typename LayerT> + class LayerInGraphBase; + + template <typename LayerT> + class LayerInGraph; + + /// Get the position of a layer in the graph. + Iterator GetPosInGraph(Layer& layer); + + /// Adds a new layer of type LaterType to the graph constructed with the arguments passed. + template <typename LayerT, typename... Args> + LayerInGraph<LayerT>* AddLayerImpl(Iterator insertBefore, Args&&... args); + + std::unordered_set<LayerBindingId> m_InputIds; + std::unordered_set<LayerBindingId> m_OutputIds; + std::unordered_map<const Layer*, Iterator> m_PosInGraphMap; + + /// Mutable to allow sorting on const object. + mutable LayersList m_Layers; + mutable bool m_LayersInOrder; +}; + +/// Common base class for layers in the graph +template <typename LayerT> +class Graph::LayerInGraphBase : public LayerT +{ +protected: + template <typename... Args> + LayerInGraphBase(Graph& graph, Iterator insertBefore, Args&&... args) + : LayerT(std::forward<Args>(args)...), m_Graph(graph) + { + m_Graph.m_PosInGraphMap.emplace(this, m_Graph.m_Layers.emplace(insertBefore, this)); + } + ~LayerInGraphBase() + { + const size_t numErased = m_Graph.m_PosInGraphMap.erase(this); + boost::ignore_unused(numErased); + BOOST_ASSERT(numErased == 1); + } + + Graph& m_Graph; +}; + +/// Input/Output layers specialize this template +template <typename LayerT> +class Graph::LayerInGraph final : public LayerInGraphBase<LayerT> +{ +public: + template <typename... Args> + LayerInGraph(Graph& graph, Iterator insertBefore, Args&&... args) + : LayerInGraphBase<LayerT>(graph, insertBefore, std::forward<Args>(args)...) + { + } +}; + +/// Inputs add/remove their binding id to m_InputIds in the graph. +template <> +class Graph::LayerInGraph<InputLayer> final : public LayerInGraphBase<InputLayer> +{ +public: + template <typename... Args> + LayerInGraph(Graph& graph, Iterator insertBefore, Args&&... args) + : LayerInGraphBase<InputLayer>(graph, insertBefore, std::forward<Args>(args)...) + { + const bool isNewId = m_Graph.m_InputIds.emplace(GetBindingId()).second; + if (!isNewId) + { + throw InvalidArgumentException("A layer already exists with the specified id"); + } + } + ~LayerInGraph() override + { + const size_t numErased = m_Graph.m_InputIds.erase(GetBindingId()); + boost::ignore_unused(numErased); + BOOST_ASSERT(numErased == 1); + } +}; + +/// Outputs add/remove their binding id to m_OutputIds in the graph. +template <> +class Graph::LayerInGraph<OutputLayer> final : public LayerInGraphBase<OutputLayer> +{ +public: + template <typename... Args> + LayerInGraph(Graph& graph, Iterator insertBefore, Args&&... args) + : LayerInGraphBase<OutputLayer>(graph, insertBefore, std::forward<Args>(args)...) + { + const bool isNewId = m_Graph.m_OutputIds.emplace(GetBindingId()).second; + if (!isNewId) + { + throw InvalidArgumentException("A layer already exists with the specified id"); + } + } + ~LayerInGraph() override + { + const size_t numErased = m_Graph.m_OutputIds.erase(GetBindingId()); + boost::ignore_unused(numErased); + BOOST_ASSERT(numErased == 1); + } +}; + +inline Graph::Iterator Graph::GetPosInGraph(Layer& layer) +{ + auto it = m_PosInGraphMap.find(&layer); + BOOST_ASSERT(it != m_PosInGraphMap.end()); + return it->second; +} + +template <typename LayerT, typename... Args> +inline Graph::LayerInGraph<LayerT>* Graph::AddLayerImpl(Iterator insertBefore, Args&&... args) +{ + return new LayerInGraph<LayerT>(*this, insertBefore, std::forward<Args>(args)...); +} + +/// Inputs are inserted at the front of the list, to keep the order correct if the list is sorted. +/// Outputs are inserted at the back of the list, to keep the order correct if the list is sorted. +/// Other layers are inserted before existing outputs, so the latter remain at the back of the list. +template <typename LayerT, typename... Args> +inline LayerT* Graph::AddLayer(Args&&... args) +{ + switch (LayerEnumOf<LayerT>()) + { + case LayerType::Input: + { + return AddLayerImpl<LayerT>(begin(), std::forward<Args>(args)...); + } + case LayerType::Output: + { + return AddLayerImpl<LayerT>(end(), std::forward<Args>(args)...); + } + default: + { + m_LayersInOrder = false; + const auto pos = std::prev(end(), IteratorDifference(GetNumOutputs())); + return AddLayerImpl<LayerT>(pos, std::forward<Args>(args)...); + } + } +} + +template <typename LayerT, typename... Args> +inline LayerT* Graph::InsertNewLayer(InputSlot& insertBefore, Args&&... args) +{ + // Insert before the child layer so topological order is kept. + const Iterator pos = GetPosInGraph(insertBefore.GetOwningLayer()); + LayerT* const layer = AddLayerImpl<LayerT>(pos, std::forward<Args>(args)...); + insertBefore.Insert(*layer); + return layer; +} + +inline Graph::Iterator Graph::EraseLayer(Iterator pos) +{ + delete *pos; + return m_Layers.erase(pos); +} + +template <typename LayerT> +inline Graph::Iterator Graph::EraseLayer(LayerT*& layer) +{ + BOOST_ASSERT(layer != nullptr); + Iterator next = EraseLayer(GetPosInGraph(*layer)); + layer = nullptr; + return next; +} + +} // namespace armnn diff --git a/src/armnn/InternalTypes.cpp b/src/armnn/InternalTypes.cpp new file mode 100644 index 0000000000..e39b15be05 --- /dev/null +++ b/src/armnn/InternalTypes.cpp @@ -0,0 +1,45 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "InternalTypes.hpp" + +#include <boost/assert.hpp> + +namespace armnn +{ + +char const* GetLayerTypeAsCString(LayerType type) +{ + switch (type) + { + case LayerType::Activation: return "Activation"; + case LayerType::Addition: return "Addition"; + case LayerType::BatchNormalization: return "BatchNormalization"; + case LayerType::Constant: return "Constant"; + case LayerType::Convolution2d: return "Convolution2d"; + case LayerType::DepthwiseConvolution2d: return "DepthwiseConvolution2d"; + case LayerType::FakeQuantization: return "FakeQuantization"; + case LayerType::Floor: return "Floor"; + case LayerType::FullyConnected: return "FullyConnected"; + case LayerType::Input: return "Input"; + case LayerType::L2Normalization: return "L2Normalization"; + case LayerType::MemCopy: return "MemCopy"; + case LayerType::Merger: return "Merger"; + case LayerType::Multiplication: return "Multiplication"; + case LayerType::Normalization: return "Normalization"; + case LayerType::Output: return "Output"; + case LayerType::Permute: return "Permute"; + case LayerType::Pooling2d: return "Pooling2d"; + case LayerType::Reshape: return "Reshape"; + case LayerType::ResizeBilinear: return "ResizeBilinear"; + case LayerType::Softmax: return "Softmax"; + case LayerType::Splitter: return "Splitter"; + default: + BOOST_ASSERT_MSG(false, "Unknown layer type"); + return "Unknown"; + } +} + +} diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp new file mode 100644 index 0000000000..8db0da4cf2 --- /dev/null +++ b/src/armnn/InternalTypes.hpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/Types.hpp> + +#include <array> + +namespace armnn +{ + +enum class LayerType +{ + FirstLayer, + Activation = FirstLayer, + Addition, + BatchNormalization, + Constant, + Convolution2d, + DepthwiseConvolution2d, + FakeQuantization, + Floor, + FullyConnected, + Input, + L2Normalization, + MemCopy, + Merger, + Multiplication, + Normalization, + Output, + Permute, + Pooling2d, + Reshape, + ResizeBilinear, + Softmax, + // Last layer goes here + LastLayer, + Splitter = LastLayer, +}; + +const char* GetLayerTypeAsCString(LayerType type); + +using Coordinates = std::array<unsigned int, MaxNumOfTensorDimensions>; +using Dimensions = std::array<unsigned int, MaxNumOfTensorDimensions>; + +} diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp new file mode 100644 index 0000000000..20a8ba4926 --- /dev/null +++ b/src/armnn/Layer.cpp @@ -0,0 +1,220 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "Layer.hpp" + +#include "Graph.hpp" +#include "backends/WorkloadData.hpp" + +#include <boost/cast.hpp> +#include <boost/format.hpp> +#include <boost/log/trivial.hpp> + +#include <numeric> + +namespace armnn +{ + +void InputSlot::Insert(Layer& layer) +{ + BOOST_ASSERT(layer.GetNumInputSlots() <= 1); + BOOST_ASSERT(layer.GetNumOutputSlots() == 1); + + OutputSlot* const prevSlot = GetConnectedOutputSlot(); + + if (prevSlot != nullptr) + { + // Disconnect parent from this + prevSlot->Disconnect(*this); + + // Connect inserted layer to parent + BOOST_ASSERT(layer.GetNumInputSlots() == 1); + prevSlot->Connect(layer.GetInputSlot(0)); + + // Set tensor info for inserted layer + const TensorInfo& tensorInfo = prevSlot->GetTensorInfo(); + layer.GetOutputHandler().SetTensorInfo(tensorInfo); + } + + // Connect inserted layer to this + layer.GetOutputSlot(0).Connect(*this); +} + +const InputSlot* OutputSlot::GetConnection(unsigned int index) const +{ + ValidateConnectionIndex(index); + return m_Connections[index]; +} + +InputSlot* OutputSlot::GetConnection(unsigned int index) +{ + ValidateConnectionIndex(index); + return m_Connections[index]; +} + +void OutputSlot::SetTensorInfo(const TensorInfo& tensorInfo) +{ + GetOutputHandler().SetTensorInfo(tensorInfo); +} + +const TensorInfo& OutputSlot::GetTensorInfo() const +{ + return GetOutputHandler().GetTensorInfo(); +} + +bool OutputSlot::IsTensorInfoSet() const +{ + return GetOutputHandler().IsTensorInfoSet(); +} + +bool OutputSlot::ValidateTensorShape(const TensorShape& shape) const +{ + BOOST_ASSERT_MSG(IsTensorInfoSet(), "TensorInfo must be set in order to validate the shape."); + return shape == m_OutputHandler.GetTensorInfo().GetShape(); +} + +int OutputSlot::Connect(InputSlot& destination) +{ + destination.SetConnection(this); + m_Connections.push_back(&destination); + return boost::numeric_cast<int>(m_Connections.size() - 1); +} + +void OutputSlot::Disconnect(InputSlot& slot) +{ + slot.SetConnection(nullptr); + m_Connections.erase(std::remove(m_Connections.begin(), m_Connections.end(), &slot), m_Connections.end()); +} + +void OutputSlot::DisconnectAll() +{ + while (GetNumConnections() > 0) + { + InputSlot& connection = *GetConnection(0); + Disconnect(connection); + } +} + +void OutputSlot::MoveAllConnections(OutputSlot& destination) +{ + while (GetNumConnections() > 0) + { + InputSlot& connection = *GetConnection(0); + Disconnect(connection); + destination.Connect(connection); + } +} + +void OutputSlot::ValidateConnectionIndex(unsigned int index) const +{ + if (boost::numeric_cast<std::size_t>(index) >= m_Connections.size()) + { + throw InvalidArgumentException( + boost::str(boost::format("GetConnection: Invalid index %1% provided") % index)); + } +} + +Layer::Layer(unsigned int numInputSlots, unsigned int numOutputSlots, LayerType type, const char* name) +: m_OutputHandlers(numOutputSlots) +, m_LayerName(name ? name : "") +, m_Type(type) +, m_ComputeDevice(Compute::Undefined) +{ + m_InputSlots.reserve(numInputSlots); + for (unsigned int i = 0; i < numInputSlots; ++i) + { + m_InputSlots.emplace_back(*this, i); + } + + m_OutputSlots.reserve(numOutputSlots); + for (unsigned int i = 0; i < numOutputSlots; ++i) + { + m_OutputSlots.emplace_back(*this, m_OutputHandlers[i]); + } +} + +void Layer::CollectWorkloadInputs(WorkloadDataCollector& dataCollector, const Graph& graph) const +{ + for (auto&& inputSlot : GetInputSlots()) + { + // The graph must be well-formed at this point + BOOST_ASSERT(inputSlot.GetConnection()); + const OutputHandler& outputHandler = inputSlot.GetConnectedOutputSlot()->GetOutputHandler(); + dataCollector.Push(outputHandler.GetData(), outputHandler.GetTensorInfo()); + } +} + +void Layer::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector, const Graph& graph) const +{ + for (auto&& outputHandler : m_OutputHandlers) + { + outputHandler.CollectWorkloadOutputs(dataCollector); + } +} + +void Layer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) +{ + for (auto&& outputHandler : m_OutputHandlers) + { + outputHandler.CreateTensorHandles(factory); + } +} + +DataType Layer::GetDataType() const +{ + if (GetNumInputSlots() > 0) // Ignore the input layer + { + return GetInputSlot(0).GetConnection()->GetTensorInfo().GetDataType(); + } + return DataType::Float32; +} + +void Layer::ResetPriority() const +{ + m_Priority = 0; + m_Visiting = false; +} + +LayerPriority Layer::GetPriority() const +{ + constexpr LayerPriority inputPrio = std::numeric_limits<LayerPriority>::lowest(); + constexpr LayerPriority outputPrio = std::numeric_limits<LayerPriority>::max(); + + if (GetType() == LayerType::Input) + { + m_Priority = inputPrio; + } + else if (GetType() == LayerType::Output) + { + m_Priority = outputPrio; + } + else if (m_Priority == 0) + { + if (m_Visiting) + { + throw GraphValidationException("Graph has circular dependencies: cannot walk"); + } + + auto maxPrio = [](const LayerPriority prio, const InputSlot& slot) -> LayerPriority + { + const Layer& input = slot.GetConnectedOutputSlot()->GetOwningLayer(); + return std::max(prio, input.GetPriority()); + }; + + m_Visiting = true; + LayerPriority parentPrio = std::accumulate(GetInputSlots().cbegin(), GetInputSlots().cend(), 0U, maxPrio); + m_Visiting = false; + + if (parentPrio >= outputPrio) + { + throw GraphValidationException("Graph has too many edges"); + } + + m_Priority = parentPrio + 1U; + } + + return m_Priority; +} + +} // namespace armnn diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp new file mode 100644 index 0000000000..1160f0ab09 --- /dev/null +++ b/src/armnn/Layer.hpp @@ -0,0 +1,309 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "LayerFwd.hpp" + +#include "backends/OutputHandler.hpp" +#include "backends/WorkloadDataCollector.hpp" +#include "backends/WorkloadInfo.hpp" +#include "InternalTypes.hpp" + +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/INetwork.hpp> + +#include <algorithm> +#include <memory> +#include <string> +#include <vector> + +#include <boost/numeric/conversion/cast.hpp> +#include <boost/core/ignore_unused.hpp> +#include <boost/cast.hpp> + +namespace armnn +{ + +class IWorkload; +class IWorkloadFactory; +class Layer; +class Graph; + +class InputSlot final : public IInputSlot +{ +public: + explicit InputSlot(Layer& owner, unsigned int slotIndex) + : m_OwningLayer(owner) + , m_Connection(nullptr) + , m_SlotIndex(slotIndex) + {} + + ~InputSlot(); + + Layer& GetOwningLayer() const { return m_OwningLayer; } + unsigned int GetSlotIndex() const { return m_SlotIndex; } + + const OutputSlot* GetConnectedOutputSlot() const { return m_Connection; } + OutputSlot* GetConnectedOutputSlot() { return m_Connection; } + + /// Links the slot to an output slot or breaks an existing link if passing nullptr + void SetConnection(OutputSlot* source) + { + if (m_Connection != nullptr && source != nullptr) + { + throw InvalidArgumentException("Tried to connect an output slot to an input slot, " + "but the latter already has a connection"); + } + m_Connection = source; + } + + // Insert single-output existing layer at this point in the graph. + void Insert(Layer& layer); + + // IInputSlot + + const IOutputSlot* GetConnection() const override; + IOutputSlot* GetConnection() override; + +private: + Layer& m_OwningLayer; + OutputSlot* m_Connection; + const unsigned int m_SlotIndex; +}; + +class OutputSlot final : public IOutputSlot +{ +public: + explicit OutputSlot(Layer& owner, OutputHandler& outputHandler) + : m_OwningLayer(owner) + , m_OutputHandler(outputHandler) + {} + + ~OutputSlot() + { + DisconnectAll(); + } + + Layer& GetOwningLayer() const { return m_OwningLayer; } + + const OutputHandler& GetOutputHandler() const { return m_OutputHandler; } + OutputHandler& GetOutputHandler() { return m_OutputHandler; } + + int Connect(InputSlot& destination); + void Disconnect(InputSlot& slot); + + const std::vector<InputSlot*>& GetConnections() const { return m_Connections; } + + bool ValidateTensorShape(const TensorShape& shape) const; + + // Disconnect all conections + void DisconnectAll(); + + /// Move all connections to another OutputSlot + void MoveAllConnections(OutputSlot& destination); + + // IOutputSlot + + unsigned int GetNumConnections() const override { return boost::numeric_cast<unsigned int>(m_Connections.size()); } + const InputSlot* GetConnection(unsigned int index) const override; + InputSlot* GetConnection(unsigned int index) override; + + void SetTensorInfo(const TensorInfo& tensorInfo) override; + const TensorInfo& GetTensorInfo() const override; + bool IsTensorInfoSet() const override; + + int Connect(IInputSlot& destination) override + { + return Connect(*boost::polymorphic_downcast<InputSlot*>(&destination)); + } + + void Disconnect(IInputSlot& slot) override + { + return Disconnect(*boost::polymorphic_downcast<InputSlot*>(&slot)); + } + +private: + void ValidateConnectionIndex(unsigned int index) const; + + Layer& m_OwningLayer; + OutputHandler& m_OutputHandler; + std::vector<InputSlot*> m_Connections; +}; + +// InputSlot inlines that need OutputSlot declaration + +inline InputSlot::~InputSlot() +{ + if (m_Connection != nullptr) + { + m_Connection->Disconnect(*this); + } +} + +inline const IOutputSlot* InputSlot::GetConnection() const { return GetConnectedOutputSlot(); } +inline IOutputSlot* InputSlot::GetConnection() { return GetConnectedOutputSlot(); } + +// Base layer class + +using LayerPriority = unsigned int; + +class Layer : public IConnectableLayer +{ +public: + /// @param name Optional name for the layer (may be nullptr) + Layer(unsigned int numInputSlots, unsigned int numOutputSlots, LayerType type, const char* name); + + const std::string& GetNameStr() const + { + return m_LayerName; + } + + const OutputHandler& GetOutputHandler(unsigned int i = 0) const + { + return m_OutputHandlers[i]; + } + + OutputHandler& GetOutputHandler(unsigned int i = 0) + { + return const_cast<OutputHandler&>(const_cast<const Layer*>(this)->GetOutputHandler(i)); + } + + const std::vector<InputSlot>& GetInputSlots() const { return m_InputSlots; } + const std::vector<OutputSlot>& GetOutputSlots() const { return m_OutputSlots; } + + // Allow non-const access to input slots, but don't expose vector (vector size is fixed at layer construction). + std::vector<InputSlot>::iterator BeginInputSlots() { return m_InputSlots.begin(); } + std::vector<InputSlot>::iterator EndInputSlots() { return m_InputSlots.end(); } + + // Allow non-const access to output slots, but don't expose vector (vector size is fixed at layer construction). + std::vector<OutputSlot>::iterator BeginOutputSlots() { return m_OutputSlots.begin(); } + std::vector<OutputSlot>::iterator EndOutputSlots() { return m_OutputSlots.end(); } + + // Check whether the outputs of this layer don't have any connection + bool IsOutputUnconnected() + { + unsigned int numConnections = 0; + + for (auto&& output : GetOutputSlots()) + { + numConnections += output.GetNumConnections(); + } + + return (GetNumOutputSlots() > 0) && (numConnections == 0); + } + + // Used for sorting + void ResetPriority() const; + LayerPriority GetPriority() const; + + LayerType GetType() const { return m_Type; } + + DataType GetDataType() const; + + Compute GetComputeDevice() const { return m_ComputeDevice; } + void SetComputeDevice(Compute device) { m_ComputeDevice = device; } + + // Virtuals + + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const = 0; + + virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory); + + /// Creates a dynamically-allocated copy of this layer + /// @param graph The Graph into which this Layer is being cloned + virtual Layer* Clone(Graph& graph) const = 0; + + virtual void ValidateTensorShapesFromInputs() = 0; + + // IConnectableLayer + + const char* GetName() const override { return m_LayerName.c_str(); } + + unsigned int GetNumInputSlots() const override { return static_cast<unsigned int>(m_InputSlots.size()); } + unsigned int GetNumOutputSlots() const override { return static_cast<unsigned int>(m_OutputSlots.size()); } + + const InputSlot& GetInputSlot(unsigned int index) const override { return m_InputSlots.at(index); } + InputSlot& GetInputSlot(unsigned int index) override { return m_InputSlots.at(index); } + const OutputSlot& GetOutputSlot(unsigned int index = 0) const override { return m_OutputSlots.at(index); } + OutputSlot& GetOutputSlot(unsigned int index = 0) override { return m_OutputSlots.at(index); } + +protected: + // Graph needs access to the virtual destructor + friend class Graph; + virtual ~Layer() = default; + + template <typename QueueDescriptor> + void CollectQueueDescriptorInputs(QueueDescriptor& descriptor, WorkloadInfo& info, const Graph& graph) const + { + WorkloadDataCollector dataCollector(descriptor.m_Inputs, info.m_InputTensorInfos); + CollectWorkloadInputs(dataCollector, graph); + } + + template <typename QueueDescriptor> + void CollectQueueDescriptorOutputs(QueueDescriptor& descriptor, WorkloadInfo& info, const Graph& graph) const + { + WorkloadDataCollector dataCollector(descriptor.m_Outputs, info.m_OutputTensorInfos); + CollectWorkloadOutputs(dataCollector, graph); + } + + /// Helper function to reduce duplication in *Layer::CreateWorkload + template <typename QueueDescriptor> + WorkloadInfo PrepInfoAndDesc(QueueDescriptor& descriptor, const Graph& graph) const + { + WorkloadInfo info; + CollectQueueDescriptorInputs(descriptor, info, graph); + CollectQueueDescriptorOutputs(descriptor, info, graph); + return info; + } + + template <typename LayerType, typename ... Params> + LayerType* CloneBase(Graph& graph, Params&& ... params) const; + +private: + void CollectWorkloadInputs(WorkloadDataCollector& dataCollector, const Graph& graph) const; + void CollectWorkloadOutputs(WorkloadDataCollector& dataCollector, const Graph& graph) const; + +protected: + std::vector<OutputHandler> m_OutputHandlers; + +private: + const std::string m_LayerName; + + std::vector<InputSlot> m_InputSlots; + std::vector<OutputSlot> m_OutputSlots; + + const LayerType m_Type; + Compute m_ComputeDevice; + + /// Used for sorting + mutable LayerPriority m_Priority = 0; + mutable bool m_Visiting = false; +}; + +// A layer user-provided data can be bound to (e.g. inputs, outputs) +class BindableLayer : public Layer +{ +public: + BindableLayer(unsigned int numInputSlots, + unsigned int numOutputSlots, + LayerType type, + const char* name, + LayerBindingId id) + : Layer(numInputSlots, numOutputSlots, type, name) + , m_Id(id) + { + } + + LayerBindingId GetBindingId() const { return m_Id; }; + +protected: + ~BindableLayer() = default; + +private: + LayerBindingId m_Id; +}; + +} diff --git a/src/armnn/LayerFwd.hpp b/src/armnn/LayerFwd.hpp new file mode 100644 index 0000000000..c2f6c7363d --- /dev/null +++ b/src/armnn/LayerFwd.hpp @@ -0,0 +1,13 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +namespace armnn +{ + +class BindableLayer; +class Layer; + +} diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp new file mode 100644 index 0000000000..0567b94905 --- /dev/null +++ b/src/armnn/LayerSupport.cpp @@ -0,0 +1,260 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "armnn/LayerSupport.hpp" + +#include "backends/RefLayerSupport.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/ClLayerSupport.hpp" + +#include <boost/assert.hpp> + +#include <cstring> +#include <algorithm> + +namespace armnn +{ + +// Helper function to copy a full string to a truncated version +void CopyErrorMessage(char* truncatedString, const char* fullString, size_t maxLength) +{ + if(truncatedString != nullptr) + { + size_t copyLength = std::min(maxLength, strlen(fullString)); + std::strncpy(truncatedString, fullString, copyLength); + // Ensure null-terminated string + truncatedString[copyLength] = '\0'; + } +} + +// Helper macro to avoid code duplication. +// Forwards function func to funcRef, funcNeon or funcCl, depending on the value of compute +#define FORWARD_LAYER_SUPPORT_FUNC(compute, func, ...) \ + std::string reasonIfUnsupportedFull; \ + bool isSupported; \ + switch(compute) \ + { \ + case Compute::CpuRef: \ + isSupported = func##Ref(__VA_ARGS__, &reasonIfUnsupportedFull); \ + break; \ + case Compute::CpuAcc: \ + isSupported = func##Neon(__VA_ARGS__, &reasonIfUnsupportedFull); \ + break; \ + case Compute::GpuAcc: \ + isSupported = func##Cl(__VA_ARGS__, &reasonIfUnsupportedFull); \ + break; \ + default: \ + isSupported = func##Ref(__VA_ARGS__, &reasonIfUnsupportedFull); \ + break; \ + } \ + CopyErrorMessage(reasonIfUnsupported, reasonIfUnsupportedFull.c_str(), reasonIfUnsupportedMaxLength); \ + return isSupported; + +bool CheckTensorDataTypesEqual(const TensorInfo& input0, const TensorInfo& input1) +{ + return input0.GetDataType() == input1.GetDataType(); +} + +bool IsActivationSupported(Compute compute, + const TensorInfo& input, + const ActivationDescriptor& descriptor, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsActivationSupported, input, descriptor); +} + +bool IsAdditionSupported(Compute compute, + const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + if(!CheckTensorDataTypesEqual(input0, input1)) + { + return false; + } + + FORWARD_LAYER_SUPPORT_FUNC(compute, IsAdditionSupported, input0, input1, output); +} + +bool IsBatchNormalizationSupported(Compute compute, + const TensorInfo& input, + const BatchNormalizationDescriptor& descriptor, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsBatchNormalizationSupported, input, descriptor); +} + +bool IsConstantSupported(Compute compute, + const TensorInfo& output, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsConstantSupported, output); +} + +bool IsConvolution2dSupported(Compute compute, + const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsConvolution2dSupported, input, descriptor, weights); +} + +bool IsDepthwiseConvolutionSupported(Compute compute, + const TensorInfo& input, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsDepthwiseConvolutionSupported, input, descriptor, weights); +} + +bool IsInputSupported(Compute compute, + const TensorInfo& input, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsInputSupported, input); +} + +bool IsFullyConnectedSupported(Compute compute, + const TensorInfo& input, + const FullyConnectedDescriptor& descriptor, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsFullyConnectedSupported, input, descriptor); +} + +bool IsL2NormalizationSupported(Compute compute, + const TensorInfo& input, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsL2NormalizationSupported, input); +} + +bool IsMergerSupported(Compute compute, + std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + BOOST_ASSERT(inputs.size() > 0); + FORWARD_LAYER_SUPPORT_FUNC(compute, IsMergerSupported, inputs, descriptor); +} + +bool IsMultiplicationSupported(Compute compute, + const TensorInfo& input0, + const TensorInfo& input1, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsMultiplicationSupported, input0, input1); +} + +bool IsNormalizationSupported(Compute compute, + const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsNormalizationSupported, input, output, descriptor); +} + +bool IsOutputSupported(Compute compute, + const TensorInfo& output, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsOutputSupported, output); +} + +bool IsPermuteSupported(Compute compute, + const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsPermuteSupported, input, output, descriptor); +} + +bool IsPooling2dSupported(Compute compute, + const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsPooling2dSupported, input, output, descriptor); +} + +bool IsResizeBilinearSupported(Compute compute, + const TensorInfo& input, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsResizeBilinearSupported, input); +} + +bool IsSoftmaxSupported(Compute compute, + const TensorInfo& input, + const SoftmaxDescriptor& descriptor, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsSoftmaxSupported, input, descriptor); +} + +bool IsSplitterSupported(Compute compute, + const TensorInfo& input, + const ViewsDescriptor& descriptor, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsSplitterSupported, input, descriptor); +} + +bool IsFakeQuantizationSupported(Compute compute, + const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsFakeQuantizationSupported, input, descriptor); +} + +bool IsReshapeSupported(Compute compute, + const TensorInfo& input, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(compute, IsReshapeSupported, input); +} + +bool IsFloorSupported(Compute compute, + const TensorInfo& input, + const TensorInfo& output, + char* reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + // By definition (that is, regardless of compute device), shapes and data type must match + if (input.GetShape() != output.GetShape() || input.GetDataType() != output.GetDataType()) + { + return false; + } + + FORWARD_LAYER_SUPPORT_FUNC(compute, IsFloorSupported, input, output); +} + +} diff --git a/src/armnn/LayerSupportCommon.hpp b/src/armnn/LayerSupportCommon.hpp new file mode 100644 index 0000000000..5b7feac387 --- /dev/null +++ b/src/armnn/LayerSupportCommon.hpp @@ -0,0 +1,64 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/DescriptorsFwd.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> + +namespace armnn +{ + +template<typename Float32Func, typename Uint8Func, typename ... Params> +bool IsSupportedForDataTypeGeneric(std::string* reasonIfUnsupported, + DataType dataType, + Float32Func floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + switch(dataType) + { + case DataType::Float32: + return floatFuncPtr(reasonIfUnsupported, std::forward<Params>(params)...); + case DataType::QuantisedAsymm8: + return uint8FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...); + default: + return false; + } +} + +template<typename ... Params> +bool TrueFunc(std::string* reasonIfUnsupported, Params&&... params) +{ + return true; +} + +template<typename ... Params> +bool FalseFunc(std::string* reasonIfUnsupported, Params&&... params) +{ + return false; +} + +template<typename ... Params> +bool FalseFuncF32(std::string* reasonIfUnsupported, Params&&... params) +{ + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Layer is not supported with float32 data type"; + } + return false; +} + +template<typename ... Params> +bool FalseFuncU8(std::string* reasonIfUnsupported, Params&&... params) +{ + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Layer is not supported with 8-bit data type"; + } + return false; +} + +} diff --git a/src/armnn/Layers.cpp b/src/armnn/Layers.cpp new file mode 100644 index 0000000000..ddbc7d222c --- /dev/null +++ b/src/armnn/Layers.cpp @@ -0,0 +1,986 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "Layers.hpp" +#include "Graph.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/Workload.hpp" +#include "backends/WorkloadFactory.hpp" + +#include "Permute.hpp" + + +namespace armnn +{ + +template <typename LayerType, typename ... Params> +LayerType* Layer::CloneBase(Graph& graph, Params&& ... params) const +{ + LayerType* const layer = graph.AddLayer<LayerType>(std::forward<Params>(params)...); + + layer->SetComputeDevice(m_ComputeDevice); + + return layer; +} + +ActivationLayer::ActivationLayer(const ActivationDescriptor& param, const char* name) + : LayerWithParameters(1, 1, LayerType::Activation, param, name) +{ +} + +std::unique_ptr<IWorkload> ActivationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + ActivationQueueDescriptor descriptor; + return factory.CreateActivation(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +ActivationLayer* ActivationLayer::Clone(Graph& graph) const +{ + return CloneBase<ActivationLayer>(graph, m_Param, GetName()); +} + +void ActivationLayer::ValidateTensorShapesFromInputs() +{ + auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo(); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()), + "ActivationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +AdditionLayer::AdditionLayer(const char* name) + : Layer(2, 1, LayerType::Addition, name) +{ +} + +std::unique_ptr<IWorkload> AdditionLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + AdditionQueueDescriptor descriptor; + return factory.CreateAddition(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +AdditionLayer* AdditionLayer::Clone(Graph& graph) const +{ + return CloneBase<AdditionLayer>(graph, GetName()); +} + +void AdditionLayer::ValidateTensorShapesFromInputs() +{ + auto& input0 = GetInputSlot(0).GetConnection()->GetTensorInfo(); + auto& input1 = GetInputSlot(1).GetConnection()->GetTensorInfo(); + + // Get the max of the inputs + BOOST_ASSERT(input0.GetNumDimensions() == input1.GetNumDimensions()); + unsigned int numDims = input0.GetNumDimensions(); + std::vector<unsigned int> dims(numDims); + + // validate inputs are broadcast compatible +#if !NDEBUG + for (unsigned int i = 0; i < numDims; i++) + { + unsigned int dim0 = input0.GetShape()[i]; + unsigned int dim1 = input1.GetShape()[i]; + if (dim0 != dim1) + { + BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be one length"); + } + } +#endif + + + for (unsigned int i = 0; i < numDims; i++) + { + unsigned int dim0 = input0.GetShape()[i]; + unsigned int dim1 = input1.GetShape()[i]; + dims[i] = std::max(dim0, dim1); + } + + TensorShape outShape(numDims, dims.data()); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "AdditionLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +BatchNormalizationLayer::BatchNormalizationLayer(const armnn::BatchNormalizationDescriptor& param, const char* name) + : LayerWithParameters(1, 1, LayerType::BatchNormalization, param, name) +{ +} + +std::unique_ptr<IWorkload> BatchNormalizationLayer::CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const +{ + BatchNormalizationQueueDescriptor descriptor; + + descriptor.m_Mean = m_Mean.get(); + descriptor.m_Variance = m_Variance.get(); + descriptor.m_Beta = m_Beta.get(); + descriptor.m_Gamma = m_Gamma.get(); + return factory.CreateBatchNormalization(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +BatchNormalizationLayer* BatchNormalizationLayer::Clone(Graph& graph) const +{ + auto layer = CloneBase<BatchNormalizationLayer>(graph, m_Param, GetName()); + + layer->m_Mean = m_Mean ? std::make_unique<ScopedCpuTensorHandle>(*m_Mean) : nullptr; + layer->m_Variance = m_Variance ? std::make_unique<ScopedCpuTensorHandle>(*m_Variance) : nullptr; + layer->m_Beta = m_Beta ? std::make_unique<ScopedCpuTensorHandle>(*m_Beta) : nullptr; + layer->m_Gamma = m_Gamma ? std::make_unique<ScopedCpuTensorHandle>(*m_Gamma) : nullptr; + + return std::move(layer); +} + +void BatchNormalizationLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "BatchNormalizationLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "BatchNormalizationLayer: TensorInfo must be set on connected OutputSlot."); + + auto& info = GetInputSlot(0).GetConnection()->GetTensorInfo(); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(info.GetShape()), + "BatchNormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +Convolution2dLayer::Convolution2dLayer(const Convolution2dDescriptor& param, const char* name) + : LayerWithParameters(1, 1, LayerType::Convolution2d, param, name) +{ +} + +std::unique_ptr<IWorkload> Convolution2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + Convolution2dQueueDescriptor descriptor; + + descriptor.m_Weight = m_Weight.get(); + if (m_Param.m_BiasEnabled) + { + descriptor.m_Bias = m_Bias.get(); + } + return factory.CreateConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +Convolution2dLayer* Convolution2dLayer::Clone(Graph& graph) const +{ + auto layer = CloneBase<Convolution2dLayer>(graph, m_Param, GetName()); + layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr; + + if (layer->m_Param.m_BiasEnabled) + { + layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr; + } + + return std::move(layer); +} + +void Convolution2dLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "Convolution2dLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "Convolution2dLayer: TensorInfo must be set on connected OutputSlot."); + + + IOutputSlot* input = GetInputSlot(0).GetConnection(); + const TensorShape& inputShape = input->GetTensorInfo().GetShape(); + const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape(); + + // If we support multiple batch dimensions in the future, then this assert will need to change. + BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input."); + + unsigned int inWidth = inputShape[3]; + unsigned int inHeight = inputShape[2]; + unsigned int inBatchSize = inputShape[0]; + + unsigned int filterWidth = filterShape[3]; + unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth); + unsigned int outWidth = 1+(readWidth / m_Param.m_StrideX); + + unsigned int filterHeight = filterShape[2]; + unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight); + unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY); + + unsigned int outChannels = filterShape[0]; + unsigned int outBatchSize = inBatchSize; + + TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth}); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut), + "Convolution2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + + +DepthwiseConvolution2dLayer::DepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& param, + const char* name) + : LayerWithParameters(1, 1, LayerType::DepthwiseConvolution2d, param, name) +{ +} + +std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const +{ + DepthwiseConvolution2dQueueDescriptor descriptor; + + descriptor.m_Weight = m_Weight.get(); + if (m_Param.m_BiasEnabled) + { + descriptor.m_Bias = m_Bias.get(); + } + return factory.CreateDepthwiseConvolution2d(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +DepthwiseConvolution2dLayer* DepthwiseConvolution2dLayer::Clone(Graph& graph) const +{ + auto layer = CloneBase<DepthwiseConvolution2dLayer>(graph, m_Param, GetName()); + layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr; + + if (layer->m_Param.m_BiasEnabled) + { + layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr; + } + + return std::move(layer); +} + +void DepthwiseConvolution2dLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "DepthwiseConvolution2dLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "DepthwiseConvolution2dLayer: TensorInfo must be set on connected OutputSlot."); + + IOutputSlot* input = GetInputSlot(0).GetConnection(); + const TensorShape& inputShape = input->GetTensorInfo().GetShape(); + const TensorShape filterShape = m_Weight->GetTensorInfo().GetShape(); + + BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Convolutions will always have 4D input."); + + unsigned int inWidth = inputShape[3]; + unsigned int inHeight = inputShape[2]; + unsigned int inBatchSize = inputShape[0]; + + unsigned int filterWidth = filterShape[3]; + unsigned int readWidth = (inWidth + m_Param.m_PadLeft + m_Param.m_PadRight) - (filterWidth); + unsigned int outWidth = 1+(readWidth / m_Param.m_StrideX); + + unsigned int filterHeight = filterShape[2]; + unsigned int readHeight = (inHeight + m_Param.m_PadTop + m_Param.m_PadBottom) - (filterHeight); + unsigned int outHeight = 1+(readHeight / m_Param.m_StrideY); + unsigned int depthMultiplier = filterShape[0]; + + unsigned int outChannels = filterShape[1]*depthMultiplier; + unsigned int outBatchSize = inBatchSize; + + TensorShape outShape({outBatchSize, outChannels, outHeight, outWidth}); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "DepthwiseConvolution2dLayer: " + "TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +FakeQuantizationLayer::FakeQuantizationLayer(const FakeQuantizationDescriptor& param, const char* name) +: LayerWithParameters(1, 1, LayerType::FakeQuantization, param, name) +{ +} + +std::unique_ptr<IWorkload> FakeQuantizationLayer::CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const +{ + FakeQuantizationQueueDescriptor descriptor; + return factory.CreateFakeQuantization(descriptor, PrepInfoAndDesc(descriptor, graph) ); +} + +FakeQuantizationLayer* FakeQuantizationLayer::Clone(Graph& graph) const +{ + return CloneBase<FakeQuantizationLayer>(graph, m_Param, GetName()); +} + +void FakeQuantizationLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "FakeQuantizationLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "FakeQuantizationLayer: TensorInfo must be set on connected OutputSlot."); + + + IOutputSlot* input = GetInputSlot(0).GetConnection(); + + // input and output shapes are the same + TensorShape const& outShape = input->GetTensorInfo().GetShape(); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "FakeQuantizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +FloorLayer::FloorLayer(const char* name) + : Layer(1, 1, LayerType::Floor, name) +{ +} + +std::unique_ptr<IWorkload> FloorLayer::CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const +{ + FloorQueueDescriptor descriptor; + return factory.CreateFloor(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +FloorLayer* FloorLayer::Clone(Graph& graph) const +{ + return CloneBase<FloorLayer>(graph, GetName()); +} + +void FloorLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "FloorLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "FloorLayer: TensorInfo must be set on connected OutputSlot."); + + // input and output shapes are the same + IOutputSlot* input = GetInputSlot(0).GetConnection(); + TensorShape const& outShape = input->GetTensorInfo().GetShape(); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "FloorLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +FullyConnectedLayer::FullyConnectedLayer(const FullyConnectedDescriptor& param, const char* name) + : LayerWithParameters(1, 1, LayerType::FullyConnected, param, name) +{ +} + +std::unique_ptr<IWorkload> FullyConnectedLayer::CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const +{ + FullyConnectedQueueDescriptor descriptor; + + descriptor.m_Weight = m_Weight.get(); + if (m_Param.m_BiasEnabled) + { + descriptor.m_Bias = m_Bias.get(); + } + return factory.CreateFullyConnected(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +FullyConnectedLayer* FullyConnectedLayer::Clone(Graph& graph) const +{ + auto layer = CloneBase<FullyConnectedLayer>(graph, m_Param, GetName()); + + layer->m_Weight = m_Weight ? std::make_unique<ScopedCpuTensorHandle>(*m_Weight) : nullptr; + if (layer->m_Param.m_BiasEnabled) + { + layer->m_Bias = m_Bias ? std::make_unique<ScopedCpuTensorHandle>(*m_Bias) : nullptr; + } + + return std::move(layer); +} + +void FullyConnectedLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "FullyConnectedLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "FullyConnectedLayer: TensorInfo must be set on connected OutputSlot."); + + + TensorShape const& weightShape = m_Weight->GetTensorInfo().GetShape(); + + // output for FC is [1, w[1]] + unsigned int batches = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape()[0]; + unsigned int dimIdx = m_Param.m_TransposeWeightMatrix ? 0 : 1; + TensorShape outShape({batches, weightShape[dimIdx]}); + + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "FullyConnectedLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +InputLayer::InputLayer(LayerBindingId id, const char* name) + : BindableLayer(0, 1, LayerType::Input, name, id) +{ +} + +std::unique_ptr<IWorkload> InputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + return nullptr; +} + +InputLayer* InputLayer::Clone(Graph& graph) const +{ + return CloneBase<InputLayer>(graph, GetBindingId(), GetName()); +} + +void InputLayer::ValidateTensorShapesFromInputs() +{ + //The input layer should already have it's inputs set during graph building phase in the driver/parser. + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).IsTensorInfoSet(), + "InputLayer should already have the TensorInfo set."); +} + + +MergerLayer::MergerLayer(const OriginsDescriptor& param, const char* name) + : LayerWithParameters(param.GetNumViews(), 1, LayerType::Merger, param, name) +{ +} + +std::unique_ptr<IWorkload> MergerLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + MergerQueueDescriptor descriptor; + + // copy the view origins to the descriptor + descriptor.m_ViewOrigins.reserve(m_Param.GetNumViews()); + for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i) + { + descriptor.m_ViewOrigins.emplace_back( + std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions())); + } + + return factory.CreateMerger(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +void MergerLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) +{ + //if sub tensors are supported than the merger + //just needs to make sure that the outputs of the prev layer + //are made subtensors of the output of the merger layer + m_OutputHandlers[0].CreateTensorHandles(factory); + if (factory.SupportsSubTensors()) + { + const unsigned int numInputSlots = GetNumInputSlots(); + for (unsigned int i = 0; i < numInputSlots; ++i) + { + OutputHandler& outputHandler = GetInputSlot(i).GetConnectedOutputSlot()->GetOutputHandler(); + + outputHandler.SetData(factory.CreateSubTensorHandle(*m_OutputHandlers[0].GetData(), + outputHandler.GetTensorInfo().GetShape(), + m_Param.GetViewOrigin(i))); + } + } +} + +MergerLayer* MergerLayer::Clone(Graph& graph) const +{ + return CloneBase<MergerLayer>(graph, m_Param, GetName()); +} + +void MergerLayer::ValidateTensorShapesFromInputs() +{ + // Validate Merger layer + ConditionalThrow<LayerValidationException>(m_Param.GetNumViews() == GetNumInputSlots(), + "MergerLayer: Num Inputs must match num views."); + + unsigned int numDims = m_Param.GetNumDimensions(); + for (unsigned int i=0; i<GetNumInputSlots(); i++) + { + auto& inputInfo = GetInputSlot(i).GetConnection()->GetTensorInfo(); + + boost::ignore_unused(inputInfo); + ConditionalThrow<LayerValidationException>(numDims == inputInfo.GetNumDimensions(), + "MergerLayer: Num Dimensions must match all inputs."); + } + + // Find the bounding box (extents) of all the views + std::vector<unsigned int> extentMin(numDims); + std::vector<unsigned int> extentMax(numDims); + for (unsigned int i = 0; i < GetNumInputSlots(); i++) + { + const uint32_t* origin = m_Param.GetViewOrigin(i); + const armnn::TensorShape& shape = GetInputSlot(i).GetConnection()->GetTensorInfo().GetShape(); + for (unsigned int d = 0; d < numDims; d++) + { + extentMin[d] = std::min(extentMin[d], origin[d]); + extentMax[d] = std::max(extentMax[d], origin[d] + shape[d]); + } + } + + // Check that the bounding box starts at the origin + if (!std::all_of(extentMin.begin(), extentMin.end(), [](unsigned int s) { return s == 0; })) + { + throw LayerValidationException("MergerLayer: there is no view that starts at the origin"); + } + + // Check that there are no overlaps of views (this would lead to undefined output at those locations). + // Check each pair of views against each other + // (and don't bother to check against self, or check the same pair both ways round) + for (unsigned int a = 0; a < GetNumInputSlots(); a++) + { + const uint32_t* aOrigin = m_Param.GetViewOrigin(a); + const armnn::TensorShape& aShape = GetInputSlot(a).GetConnection()->GetTensorInfo().GetShape(); + for (unsigned int b = 0; b < a; b++) + { + const uint32_t* bOrigin = m_Param.GetViewOrigin(b); + const armnn::TensorShape& bShape = GetInputSlot(b).GetConnection()->GetTensorInfo().GetShape(); + + bool allAxesOverlap = true; + for (unsigned int d = 0; d < numDims && allAxesOverlap; d++) + { + unsigned int a1 = aOrigin[d]; + unsigned int a2 = aOrigin[d] + aShape[d]; + + unsigned int b1 = bOrigin[d]; + unsigned int b2 = bOrigin[d] + bShape[d]; + + if (a2 <= b1 || b2 <= a1) + { + allAxesOverlap = false; + } + } + if (allAxesOverlap) + { + throw LayerValidationException("MergerLayer: Some views overlap."); + } + } + } + + // Check that there are no "holes", i.e. regions of the output which is not covered by a view. + // Because we already checked that there are no overlaps, this can be done simply by checking that + // the total 'volume' of the views is the same as the output. + unsigned int totalViewsVolume = 0; + for (unsigned int i = 0; i < GetNumInputSlots(); i++) + { + totalViewsVolume += GetInputSlot(i).GetConnection()->GetTensorInfo().GetNumElements(); + } + unsigned int outputVolume = 1; + for (unsigned int d = 0; d < numDims; d++) + { + outputVolume *= (extentMax[d] - extentMin[d]); + } + if (totalViewsVolume != outputVolume) + { + throw LayerValidationException("MergerLayer: there are some gaps between views"); + } + + TensorShape outShape(numDims, extentMax.data()); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "MergerLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +MultiplicationLayer::MultiplicationLayer(const char* name) + : Layer(2, 1, LayerType::Multiplication, name) +{ +} + +std::unique_ptr<IWorkload> MultiplicationLayer::CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const +{ + MultiplicationQueueDescriptor descriptor; + + return factory.CreateMultiplication(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +MultiplicationLayer* MultiplicationLayer::Clone(Graph& graph) const +{ + return CloneBase<MultiplicationLayer>(graph, GetName()); +} + +void MultiplicationLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() == + GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape(), + "MultiplicationLayer: Inputs must match"); + + TensorInfo infoOut(GetInputSlot(0).GetConnection()->GetTensorInfo()); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(infoOut.GetShape()), + "MultiplicationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +NormalizationLayer::NormalizationLayer(const NormalizationDescriptor& param, const char* name) + : LayerWithParameters(1, 1, LayerType::Normalization, param, name) +{ +} + +std::unique_ptr<IWorkload> NormalizationLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + NormalizationQueueDescriptor descriptor; + return factory.CreateNormalization(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +NormalizationLayer* NormalizationLayer::Clone(Graph& graph) const +{ + return CloneBase<NormalizationLayer>(graph, m_Param, GetName()); +} + +void NormalizationLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "NormalizationLayer: Input slot must be connected."); + + const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +OutputLayer::OutputLayer(LayerBindingId id, const char* name) + : BindableLayer(1, 0, LayerType::Output, name, id) +{ +} + +std::unique_ptr<IWorkload> OutputLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + return nullptr; +} + +OutputLayer* OutputLayer::Clone(Graph& graph) const +{ + return CloneBase<OutputLayer>(graph, GetBindingId(), GetName()); +} + +void OutputLayer::ValidateTensorShapesFromInputs() +{ + // Just validate the input is connected + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "OutputLayer: Input slot must be connected."); +} + +PermuteLayer::PermuteLayer(const PermuteDescriptor& param, const char* name) + : LayerWithParameters(1, 1, LayerType::Permute, param, name) +{ +} + +std::unique_ptr<IWorkload> PermuteLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + PermuteQueueDescriptor descriptor; + return factory.CreatePermute(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +PermuteLayer* PermuteLayer::Clone(Graph& graph) const +{ + return CloneBase<PermuteLayer>(graph, m_Param, GetName()); +} + +void PermuteLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "PermuteLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "PermuteLayer: TensorInfo must be set on connected InputSlot."); + + const TensorInfo& infoIn = GetInputSlot(0).GetConnection()->GetTensorInfo(); + TensorShape shapeOut = armnnUtils::Permuted(infoIn.GetShape(), m_Param.m_DimMappings); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut), + "PermuteLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +Pooling2dLayer::Pooling2dLayer(const Pooling2dDescriptor& param, const char* name) + : LayerWithParameters(1, 1, LayerType::Pooling2d, param, name) +{ +} + +std::unique_ptr<IWorkload> Pooling2dLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + Pooling2dQueueDescriptor descriptor; + return factory.CreatePooling2d(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +Pooling2dLayer* Pooling2dLayer::Clone(Graph& graph) const +{ + return CloneBase<Pooling2dLayer>(graph, m_Param, GetName()); +} + +void Pooling2dLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "Pooling2dLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "Pooling2dLayer: TensorInfo must be set on connected InputSlot."); + + IOutputSlot* input = GetInputSlot(0).GetConnection(); + const TensorShape& inputShape = input->GetTensorInfo().GetShape(); + + // If we support multiple batch dimensions in the future, then this assert will need to change. + BOOST_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Pooling2dLayer will always have 4D input."); + + + unsigned int inWidth = inputShape[3]; + unsigned int inHeight = inputShape[2]; + unsigned int inChannels = inputShape[1]; + unsigned int inBatchSize = inputShape[0]; + + bool isGlobalPooling = (m_Param.m_StrideX==0 && m_Param.m_StrideY==0); + unsigned int outWidth = 1; + unsigned int outHeight = 1; + if (!isGlobalPooling) + { + BOOST_ASSERT_MSG(m_Param.m_StrideX!=0 && m_Param.m_StrideY!=0, + "Stride can only be zero when performing global pooling"); + + auto CalcSize = [](auto inSize, auto lowPad, auto highPad, auto poolSize, auto stride, auto padMethod, + auto outputShapeRounding) + { + unsigned int readSize = inSize + lowPad + highPad - poolSize; + float div = static_cast<float>(readSize) / static_cast<float>(stride); + + unsigned int size = 0; + switch (outputShapeRounding) + { + case OutputShapeRounding::Ceiling: + size = static_cast<unsigned int>(ceil(div)) + 1; + break; + case OutputShapeRounding ::Floor: + size = static_cast<unsigned int>(floor(div)) + 1; + break; + default: + BOOST_ASSERT_MSG(false, "Unsupported Output Shape Rounding"); + } + + // Make sure that border operations will start from inside the input and not the padded area + // This is what both Caffe and CL does... + if ((size - 1)*stride >= inSize + lowPad) + { + --size; + } + + return size; + }; + + outWidth = CalcSize(inWidth, m_Param.m_PadLeft, m_Param.m_PadRight, m_Param.m_PoolWidth, m_Param.m_StrideX, + m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding); + outHeight= CalcSize(inHeight, m_Param.m_PadTop, m_Param.m_PadBottom, m_Param.m_PoolHeight, m_Param.m_StrideY, + m_Param.m_PaddingMethod, m_Param.m_OutputShapeRounding); + + + } + unsigned int outChannels = inChannels; + unsigned int outBatchSize = inBatchSize; + + TensorShape shapeOut({outBatchSize, outChannels, outHeight, outWidth}); + + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(shapeOut), + "Pooling2dLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +SoftmaxLayer::SoftmaxLayer(const SoftmaxDescriptor ¶m, const char* name) + : LayerWithParameters(1, 1, LayerType::Softmax, param, name) +{ +} + +std::unique_ptr<IWorkload> SoftmaxLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + SoftmaxQueueDescriptor descriptor; + return factory.CreateSoftmax(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +SoftmaxLayer* SoftmaxLayer::Clone(Graph& graph) const +{ + return CloneBase<SoftmaxLayer>(graph, m_Param, GetName()); +} + +void SoftmaxLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "SoftmaxLayer: Input slot must be connected."); + const TensorShape& outShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "SoftmaxLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +SplitterLayer::SplitterLayer(const ViewsDescriptor& param, const char* name) + : LayerWithParameters(1, param.GetNumViews(), LayerType::Splitter, param, name) +{ +} + +std::unique_ptr<IWorkload> SplitterLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + SplitterQueueDescriptor descriptor; + + // copy the window origins to the descriptor + for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i) + { + descriptor.m_ViewOrigins.emplace_back( + std::vector<unsigned int>(m_Param.GetViewOrigin(i), m_Param.GetViewOrigin(i) + m_Param.GetNumDimensions())); + } + + return factory.CreateSplitter(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) +{ + //if sub tensors are supported than all the "splitter" need to do is to + //set the outputs to be appropriate sub tensors of the input. + if (factory.SupportsSubTensors()) + { + const OutputHandler& outputHandler = GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler(); + + ITensorHandle* inputData = outputHandler.GetData(); + //create the outputs as subtensors of the input + for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i) + { + m_OutputHandlers[i].SetData(factory.CreateSubTensorHandle(*inputData, + m_OutputHandlers[i].GetTensorInfo().GetShape(), + m_Param.GetViewOrigin(i))); + } + } + else + { + for (unsigned int i = 0; i < m_Param.GetNumViews(); ++i) + { + m_OutputHandlers[i].CreateTensorHandles(factory); + } + } +} + +SplitterLayer* SplitterLayer::Clone(Graph& graph) const +{ + return CloneBase<SplitterLayer>(graph, m_Param, GetName()); +} + +void SplitterLayer::ValidateTensorShapesFromInputs() +{ + //Output shapes must match View shapes. + for (unsigned int viewIdx = 0; viewIdx < m_Param.GetNumViews(); viewIdx++) + { + const uint32_t* sizes = m_Param.GetViewSizes(viewIdx); + + TensorShape outShape(m_Param.GetNumDimensions(), sizes); + ConditionalThrow<LayerValidationException>(GetOutputSlot(viewIdx).ValidateTensorShape(outShape), + "SplitterLayer: View sizes must match output tensor shapes."); + } +} + +MemCopyLayer::MemCopyLayer(const char* name) + : Layer(1, 1, LayerType::MemCopy, name) +{ +} + +MemCopyLayer* MemCopyLayer::Clone(Graph& graph) const +{ + return CloneBase<MemCopyLayer>(graph, GetName()); +} + +std::unique_ptr<IWorkload> MemCopyLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + MemCopyQueueDescriptor descriptor; + return factory.CreateMemCopy(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +void MemCopyLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "MemCopyLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "MemCopyLayer: TensorInfo must be set on connected OutputSlot."); + + + IOutputSlot* input = GetInputSlot(0).GetConnection(); + + // input and output shapes are the same + TensorShape const& outShape = input->GetTensorInfo().GetShape(); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "MemCopyLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +ResizeBilinearLayer::ResizeBilinearLayer(const ResizeBilinearDescriptor& param, const char* name) + : LayerWithParameters(1, 1, LayerType::ResizeBilinear, param, name) +{ +} + +std::unique_ptr<IWorkload> ResizeBilinearLayer::CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const +{ + ResizeBilinearQueueDescriptor descriptor; + return factory.CreateResizeBilinear(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +ResizeBilinearLayer* ResizeBilinearLayer::Clone(Graph& graph) const +{ + return CloneBase<ResizeBilinearLayer>(graph, m_Param, GetName()); +} + +void ResizeBilinearLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "MemCopyLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "MemCopyLayer: TensorInfo must be set on connected OutputSlot."); + + const TensorShape& inputShape = GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(); + unsigned int outWidth = m_Param.m_TargetWidth; + unsigned int outHeight = m_Param.m_TargetHeight; + unsigned int outChannels = inputShape[1]; + unsigned int outBatch = inputShape[0]; + TensorShape outShape({outBatch, outChannels, outHeight, outWidth}); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "ResizeBilinearLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +L2NormalizationLayer::L2NormalizationLayer(const char* name) + : Layer(1, 1, LayerType::L2Normalization, name) +{ +} + +std::unique_ptr<IWorkload> L2NormalizationLayer::CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const +{ + L2NormalizationQueueDescriptor descriptor; + return factory.CreateL2Normalization(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +L2NormalizationLayer* L2NormalizationLayer::Clone(Graph& graph) const +{ + return CloneBase<L2NormalizationLayer>(graph, GetName()); +} + +void L2NormalizationLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "L2NormalizationLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "L2NormalizationLayer: TensorInfo must be set on connected OutputSlot."); + + IOutputSlot* input = GetInputSlot(0).GetConnection(); + + // input and output shapes are the same + TensorShape const& outShape = input->GetTensorInfo().GetShape(); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "L2NormalizationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +ConstantLayer::ConstantLayer(const std::shared_ptr<ScopedCpuTensorHandle>& input, const char* name) + : Layer(0, 1, LayerType::Constant, name) + , m_LayerOutput(input) +{ +} + +std::unique_ptr<IWorkload> ConstantLayer::CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const +{ + ConstantQueueDescriptor descriptor; + descriptor.m_LayerOutput = m_LayerOutput.get(); + return factory.CreateConstant(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +ConstantLayer* ConstantLayer::Clone(Graph& graph) const +{ + // Cloned layers share the same layer output object + return CloneBase<ConstantLayer>(graph, m_LayerOutput, GetName()); +} + +void ConstantLayer::ValidateTensorShapesFromInputs() +{ + // get the output shape from the value of the constant layer + TensorShape const& outShape = m_LayerOutput->GetTensorInfo().GetShape(); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape), + "ConstantLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +ReshapeLayer::ReshapeLayer(const ReshapeDescriptor& param, const char* name) + : LayerWithParameters(1, 1, LayerType::Reshape, param, name) +{ +} + +std::unique_ptr<IWorkload> ReshapeLayer::CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const +{ + ReshapeQueueDescriptor descriptor; + return factory.CreateReshape(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +ReshapeLayer* ReshapeLayer::Clone(Graph& graph) const +{ + return CloneBase<ReshapeLayer>(graph, m_Param, GetName()); +} + +void ReshapeLayer::ValidateTensorShapesFromInputs() +{ + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection() != nullptr, + "ReshapeLayer: InputSlot must be connected to an OutputSlot"); + ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->IsTensorInfoSet(), + "ReshapeLayer: TensorInfo must be set on connected OutputSlot."); + ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(m_Param.m_TargetShape), + "ReshapeLayer: TensorShape set on OutputSlot[0] does not match the inferred shape."); +} + +} diff --git a/src/armnn/Layers.hpp b/src/armnn/Layers.hpp new file mode 100644 index 0000000000..5a1e3ca063 --- /dev/null +++ b/src/armnn/Layers.hpp @@ -0,0 +1,430 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "LayersFwd.hpp" + +#include "Layer.hpp" +#include "InternalTypes.hpp" + +#include <armnn/Descriptors.hpp> + +#include <boost/core/ignore_unused.hpp> + +namespace armnn +{ + +class ScopedCpuTensorHandle; + +template <typename Parameters> +class LayerWithParameters : public Layer +{ +public: + typedef Parameters DescriptorType; + + const Parameters& GetParameters() const { return m_Param; } + +protected: + LayerWithParameters(unsigned int numInputSlots, + unsigned int numOutputSlots, + LayerType type, + const Parameters& param, + const char* name) + : Layer(numInputSlots, numOutputSlots, type, name) + , m_Param(param) + { + } + + ~LayerWithParameters() = default; + + /// Helper function to reduce duplication in *Layer::CreateWorkload + template <typename QueueDescriptor> + WorkloadInfo PrepInfoAndDesc(QueueDescriptor& descriptor, const Graph& graph) const + { + descriptor.m_Parameters = m_Param; + return Layer::PrepInfoAndDesc(descriptor, graph); + } + + /// The parameters for the layer (not including tensor-valued weights etc.) + Parameters m_Param; +}; + +class ActivationLayer : public LayerWithParameters<ActivationDescriptor> +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + ActivationLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + ActivationLayer(const ActivationDescriptor ¶m, const char* name); + ~ActivationLayer() = default; +}; + +class AdditionLayer : public Layer +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + AdditionLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + AdditionLayer(const char* name); + ~AdditionLayer() = default; +}; + +class BatchNormalizationLayer : public LayerWithParameters<BatchNormalizationDescriptor> +{ +public: + std::unique_ptr<ScopedCpuTensorHandle> m_Mean; + std::unique_ptr<ScopedCpuTensorHandle> m_Variance; + std::unique_ptr<ScopedCpuTensorHandle> m_Beta; + std::unique_ptr<ScopedCpuTensorHandle> m_Gamma; + + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + BatchNormalizationLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + BatchNormalizationLayer(const BatchNormalizationDescriptor& param, const char* name); + ~BatchNormalizationLayer() = default; +}; + +class Convolution2dLayer : public LayerWithParameters<Convolution2dDescriptor> +{ +public: + std::unique_ptr<ScopedCpuTensorHandle> m_Weight; + std::unique_ptr<ScopedCpuTensorHandle> m_Bias; + + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + Convolution2dLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + Convolution2dLayer(const Convolution2dDescriptor& param, const char* name); + ~Convolution2dLayer() = default; +}; + +class DepthwiseConvolution2dLayer : public LayerWithParameters<DepthwiseConvolution2dDescriptor> +{ +public: + std::unique_ptr<ScopedCpuTensorHandle> m_Weight; + std::unique_ptr<ScopedCpuTensorHandle> m_Bias; + + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + DepthwiseConvolution2dLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + DepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor& param, const char* name); + ~DepthwiseConvolution2dLayer() = default; +}; + +class FakeQuantizationLayer : public LayerWithParameters<FakeQuantizationDescriptor> +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + FakeQuantizationLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + FakeQuantizationLayer(const FakeQuantizationDescriptor& descriptor, const char* name); + ~FakeQuantizationLayer() = default; +}; + +class FloorLayer : public Layer +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + FloorLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + FloorLayer(const char* name); + ~FloorLayer() = default; +}; + +class FullyConnectedLayer : public LayerWithParameters<FullyConnectedDescriptor> +{ +public: + std::unique_ptr<ScopedCpuTensorHandle> m_Weight; + std::unique_ptr<ScopedCpuTensorHandle> m_Bias; + + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + FullyConnectedLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + FullyConnectedLayer(const FullyConnectedDescriptor& param, const char* name); + ~FullyConnectedLayer() = default; +}; + +class InputLayer : public BindableLayer +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + InputLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + InputLayer(LayerBindingId id, const char* name); + ~InputLayer() = default; +}; + +class MergerLayer : public LayerWithParameters<OriginsDescriptor> +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override; + + MergerLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + MergerLayer(const OriginsDescriptor& param, const char* name); + ~MergerLayer() = default; +}; + +class MultiplicationLayer : public Layer +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + MultiplicationLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + MultiplicationLayer(const char* name); + ~MultiplicationLayer() = default; +}; + +class NormalizationLayer : public LayerWithParameters<NormalizationDescriptor> +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + NormalizationLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + NormalizationLayer(const NormalizationDescriptor& param, const char* name); + ~NormalizationLayer() = default; +}; + +class OutputLayer : public BindableLayer +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override + { + boost::ignore_unused(graph, factory); + } + + OutputLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + OutputLayer(LayerBindingId id, const char* name); + ~OutputLayer() = default; +}; + +class PermuteLayer : public LayerWithParameters<PermuteDescriptor> +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + PermuteLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + + const PermutationVector& GetPermutation() const + { + return m_Param.m_DimMappings; + } + + bool IsInverse(const Layer& other) const + { + return (other.GetType() == LayerType::Permute) && + GetPermutation().IsInverse(boost::polymorphic_downcast<const PermuteLayer*>(&other)->GetPermutation()); + } + + bool IsEqual(const Layer& other) const + { + return (other.GetType() == LayerType::Permute) && + GetPermutation().IsEqual(boost::polymorphic_downcast<const PermuteLayer*>(&other)->GetPermutation()); + } + +protected: + PermuteLayer(const PermuteDescriptor& param, const char* name); + ~PermuteLayer() = default; +}; + +class Pooling2dLayer : public LayerWithParameters<Pooling2dDescriptor> +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + Pooling2dLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + Pooling2dLayer(const Pooling2dDescriptor& param, const char* name); + ~Pooling2dLayer() = default; +}; + +class SoftmaxLayer : public LayerWithParameters<SoftmaxDescriptor> +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + SoftmaxLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + SoftmaxLayer(const SoftmaxDescriptor& param, const char* name); + ~SoftmaxLayer() = default; +}; + +class SplitterLayer : public LayerWithParameters<ViewsDescriptor> +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override; + + SplitterLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + SplitterLayer(const ViewsDescriptor& param, const char* name); + ~SplitterLayer() = default; +}; + +class MemCopyLayer : public Layer +{ +public: + virtual std::unique_ptr<IWorkload> + CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const override; + + MemCopyLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + MemCopyLayer(const char* name); + ~MemCopyLayer() = default; +}; + +class ResizeBilinearLayer : public LayerWithParameters<ResizeBilinearDescriptor> +{ +public: + virtual std::unique_ptr<IWorkload> + CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const override; + + ResizeBilinearLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + ResizeBilinearLayer(const ResizeBilinearDescriptor& param, const char* name); + ~ResizeBilinearLayer() = default; +}; + +class L2NormalizationLayer : public Layer +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + L2NormalizationLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + L2NormalizationLayer(const char* name); + ~L2NormalizationLayer() = default; +}; + +class ConstantLayer : public Layer +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + ConstantLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + +protected: + ConstantLayer(const std::shared_ptr<ScopedCpuTensorHandle>& input, const char* name); + ~ConstantLayer() = default; + +private: + std::shared_ptr<ScopedCpuTensorHandle> m_LayerOutput; +}; + +class ReshapeLayer : public LayerWithParameters<ReshapeDescriptor> +{ +public: + virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + ReshapeLayer* Clone(Graph& graph) const override; + + void ValidateTensorShapesFromInputs() override; + + bool IsEqual(const Layer& other) const + { + return (other.GetType() == LayerType::Reshape) && + m_Param.m_TargetShape == boost::polymorphic_downcast<const ReshapeLayer*>(&other)->m_Param.m_TargetShape; + } + +protected: + ReshapeLayer(const ReshapeDescriptor& desc, const char* name); + ~ReshapeLayer() = default; +}; + +} diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp new file mode 100644 index 0000000000..a77c723751 --- /dev/null +++ b/src/armnn/LayersFwd.hpp @@ -0,0 +1,59 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "InternalTypes.hpp" + +namespace armnn +{ + +template <LayerType Type> +struct LayerTypeOfImpl; + +template <LayerType Type> +using LayerTypeOf = typename LayerTypeOfImpl<Type>::Type; + +template <typename T> +constexpr LayerType LayerEnumOf(const T* = nullptr); + +#define DECLARE_LAYER_IMPL(_, LayerName) \ + class LayerName##Layer; \ + template <> \ + struct LayerTypeOfImpl<LayerType::_##LayerName> \ + { \ + using Type = LayerName##Layer; \ + }; \ + template <> \ + constexpr LayerType LayerEnumOf(const LayerName##Layer*) \ + { \ + return LayerType::_##LayerName; \ + } + +#define DECLARE_LAYER(LayerName) DECLARE_LAYER_IMPL(, LayerName) + +DECLARE_LAYER(Activation) +DECLARE_LAYER(Addition) +DECLARE_LAYER(BatchNormalization) +DECLARE_LAYER(Constant) +DECLARE_LAYER(Convolution2d) +DECLARE_LAYER(DepthwiseConvolution2d) +DECLARE_LAYER(FakeQuantization) +DECLARE_LAYER(Floor) +DECLARE_LAYER(FullyConnected) +DECLARE_LAYER(Input) +DECLARE_LAYER(L2Normalization) +DECLARE_LAYER(MemCopy) +DECLARE_LAYER(Merger) +DECLARE_LAYER(Multiplication) +DECLARE_LAYER(Normalization) +DECLARE_LAYER(Output) +DECLARE_LAYER(Permute) +DECLARE_LAYER(Pooling2d) +DECLARE_LAYER(Reshape) +DECLARE_LAYER(ResizeBilinear) +DECLARE_LAYER(Softmax) +DECLARE_LAYER(Splitter) + +} diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp new file mode 100644 index 0000000000..14712d209c --- /dev/null +++ b/src/armnn/LoadedNetwork.cpp @@ -0,0 +1,424 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "LoadedNetwork.hpp" +#include "Layer.hpp" +#include "Layers.hpp" +#include "Graph.hpp" +#include "Network.hpp" +#include "Runtime.hpp" +#include "Profiling.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include <arm_compute/core/CL/OpenCL.h> +#endif + +#include <backends/CpuTensorHandle.hpp> + +#include <boost/polymorphic_cast.hpp> +#include <boost/assert.hpp> +#include <boost/format.hpp> +#include <boost/log/trivial.hpp> + +namespace armnn +{ + +using namespace std; + +std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net, + const WorkloadFactories& workloadFactories) +{ + std::unique_ptr<LoadedNetwork> loadedNetwork; + + try + { + loadedNetwork.reset(new LoadedNetwork(std::move(net), workloadFactories)); + } + catch (const std::runtime_error& error) + { + BOOST_LOG_TRIVIAL(error) << "An error occurred when preparing the network workloads: " << error.what(); + return std::unique_ptr<LoadedNetwork>(); + } + catch (const armnn::Exception& error) + { + BOOST_LOG_TRIVIAL(error) << "An error occurred when preparing the network workloads: " << error.what(); + return std::unique_ptr<LoadedNetwork>(); + } +#if ARMCOMPUTECL_ENABLED + catch (const cl::Error& error) + { + BOOST_LOG_TRIVIAL(error) << "A CL error occurred attempting to prepare a network workload: " + << error.what() << ". CL error code is: " << error.err(); + return std::unique_ptr<LoadedNetwork>(); + } +#endif + + return loadedNetwork; +} + +LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net, const WorkloadFactories& workloadFactories) +: m_OptimizedNetwork(std::move(net)) +{ + Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort(); + //first create tensor handlers + //handlers are created before workloads are + //because workload creation can modify some of the handlers + //(for example the splitter and merger layers) + for (auto&& layer : order) + { + layer->CreateTensorHandles(m_OptimizedNetwork->GetGraph(), *GetWorkloadFactory(*layer, workloadFactories)); + } + + //then create workloads + for (auto&& layer : order) + { + const shared_ptr<IWorkloadFactory> workloadFactory = GetWorkloadFactory(*layer, workloadFactories); + + switch (layer->GetType()) + { + case LayerType::Input: + case LayerType::Output: + { + // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput() + break; + } + default: + { + auto workload = layer->CreateWorkload(m_OptimizedNetwork->GetGraph(), *workloadFactory); + + if (!workload) + { + const char* const layerName = layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>"; + throw InvalidArgumentException(boost::str( + boost::format("No workload created for layer (name: '%1%' type: '%2%') (compute '%3%')") + % layerName % static_cast<int>(layer->GetType()) % layer->GetComputeDevice() + )); + } + + m_WorkloadQueue.push_back(move(workload)); + break; + } + } + } + + // set up memory + m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers(); +} + +TensorInfo LoadedNetwork::GetInputTensorInfo(LayerBindingId layerId) const +{ + for (auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers()) + { + BOOST_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot"); + if (inputLayer->GetBindingId() == layerId) + { + return inputLayer->GetOutputSlot(0).GetTensorInfo(); + } + } + + throw InvalidArgumentException(boost::str(boost::format("No input layer is associated with id %1%") % layerId)); +} + +TensorInfo LoadedNetwork::GetOutputTensorInfo(LayerBindingId layerId) const +{ + for (auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers()) + { + BOOST_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot"); + BOOST_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected"); + if (outputLayer->GetBindingId() == layerId) + { + return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo(); + } + } + + throw InvalidArgumentException(boost::str(boost::format("No output layer is associated with id %1%") % layerId)); +} + +const shared_ptr<IWorkloadFactory> LoadedNetwork::GetWorkloadFactory(const Layer& layer, + const WorkloadFactories& workloadFactories) const +{ + shared_ptr<IWorkloadFactory> workloadFactory; + + switch (layer.GetComputeDevice()) + { + case Compute::CpuAcc: + { + workloadFactory = workloadFactories.m_CpuAcc; + break; + } + case Compute::GpuAcc: + { + workloadFactory = workloadFactories.m_GpuAcc; + break; + } + case Compute::CpuRef: + default: + { + workloadFactory = workloadFactories.m_CpuRef; + break; + } + } + + BOOST_ASSERT_MSG(workloadFactory, "No workload factory"); + + std::string reasonIfUnsupported; + BOOST_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported), + "Factory does not support layer"); + boost::ignore_unused(reasonIfUnsupported); + + return workloadFactory; +} + +namespace { + +// Non-copyable class owning accelerator-specific tensor data. +class TensorPin +{ +public: + TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id) + : m_TensorHandle(std::move(handle)) + , m_TensorInfo(info) + , m_Id(id) + { + } + + ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); } + const TensorInfo& GetTensorInfo() const { return m_TensorInfo; } + LayerBindingId GetBindingId() const { return m_Id; } + +private: + std::unique_ptr<ITensorHandle> m_TensorHandle; + TensorInfo m_TensorInfo; + LayerBindingId m_Id; +}; + +static const TensorPin& GetTensorPin(LayerBindingId id, + const std::vector<TensorPin>& pins, + char const* bindingPointDesc) +{ + auto it = std::find_if(pins.begin(), pins.end(), + [id](const TensorPin& pin) + { + return pin.GetBindingId() == id; + }); + + if (it != pins.end()) + { + return *it; + } + else + { + throw InvalidArgumentException(boost::str( + boost::format("No tensor supplied for %1% %2%") % bindingPointDesc % id)); + } +} + +// Stores data that needs to be kept accessible for the entire execution of a workload. +class WorkloadData +{ +public: + WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors) + { + m_InputTensorPins.reserve(inputTensors.size()); + m_OutputTensorPins.reserve(outputTensors.size()); + + for (auto inputTensorPair : inputTensors) + { + auto inputTensor = inputTensorPair.second; + + std::unique_ptr<ITensorHandle> tensorHandle = + std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea()); + LayerBindingId layerId = inputTensorPair.first; + + m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId); + } + + for (auto outputTensorPair : outputTensors) + { + auto outputTensor = outputTensorPair.second; + + std::unique_ptr<ITensorHandle> tensorHandle = + std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea()); + LayerBindingId layerId = outputTensorPair.first; + + m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId); + } + } + + const TensorPin& GetInputTensorPin(LayerBindingId id) const + { + return GetTensorPin(id, m_InputTensorPins, "input"); + } + + const TensorPin& GetOutputTensorPin(LayerBindingId id) const + { + return GetTensorPin(id, m_OutputTensorPins, "output"); + } + +private: + + std::vector<TensorPin> m_InputTensorPins; + std::vector<TensorPin> m_OutputTensorPins; +}; + +} + +Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors, + const OutputTensors& outputTensors, + const WorkloadFactories& workloadFactories) +{ + ARMNN_UPDATE_PROFILING_EVENT_TAG(); + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "EnqueueWorkload"); + + const Graph& graph = m_OptimizedNetwork->GetGraph(); + + // Walk graph to determine the order of execution + if (graph.GetNumLayers() < 2) + { + BOOST_LOG_TRIVIAL(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph"; + return Status::Failure; + } + + // Data that must be kept alive for the entire execution of the workload + WorkloadData workloadData(inputTensors, outputTensors); + + if (graph.GetNumInputs() != inputTensors.size()) + { + throw InvalidArgumentException("Number of inputs provided does not match network."); + } + + // for each input to the network, call EnqueueInput with the data passed by the user + for (const BindableLayer* inputLayer : graph.GetInputLayers()) + { + const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId()); + EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo(), workloadFactories); + } + + // for each output to the network, call EnqueueOutput with the data passed by the user + for (const BindableLayer* outputLayer : graph.GetOutputLayers()) + { + const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId()); + EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo(), workloadFactories); + } + + bool executionSucceeded = true; + + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute"); + executionSucceeded = Execute(); + } + + // Hack: get rid of inputs and outputs we added + TidyWorkloadQueue(graph.GetNumInputs(), graph.GetNumOutputs()); + + return executionSucceeded ? Status::Success : Status::Failure; +} + +void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo, + const WorkloadFactories& workloadFactories) +{ + if (layer.GetType() != LayerType::Input) + { + throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer"); + } + + if (tensorHandle == nullptr) + { + throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL"); + } + + InputQueueDescriptor inputQueueDescriptor; + WorkloadInfo info; + + inputQueueDescriptor.m_Inputs.push_back(tensorHandle); + info.m_InputTensorInfos.push_back(tensorInfo); + + BOOST_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output"); + const OutputHandler& handler = layer.GetOutputHandler(); + const TensorInfo& outputTensorInfo = handler.GetTensorInfo(); + ITensorHandle* outputTensorHandle = handler.GetData(); + BOOST_ASSERT_MSG(outputTensorHandle != nullptr, + "Data should have been allocated."); + inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle); + info.m_OutputTensorInfos.push_back(outputTensorInfo); + + shared_ptr<IWorkloadFactory> workloadFactory = GetWorkloadFactory(layer, workloadFactories); + auto inputWorkload = workloadFactory->CreateInput(inputQueueDescriptor, info); + BOOST_ASSERT_MSG(inputWorkload, "No input workload created"); + m_WorkloadQueue.insert(m_WorkloadQueue.begin(), move(inputWorkload)); +} + +void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, + const TensorInfo& tensorInfo, const WorkloadFactories& workloadFactories) +{ + if (layer.GetType() != LayerType::Output) + { + throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer"); + } + + if (tensorHandle == nullptr) + { + throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL"); + } + + OutputQueueDescriptor outputQueueDescriptor; + WorkloadInfo info; + + outputQueueDescriptor.m_Outputs.push_back(tensorHandle); + info.m_OutputTensorInfos.push_back(tensorInfo); + + BOOST_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input."); + + // Get the output handler from the previous node + const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler(); + + const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo(); + ITensorHandle* inputTensorHandle = outputHandler.GetData(); + BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated."); + + outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle); + info.m_InputTensorInfos.push_back(inputTensorInfo); + + shared_ptr<IWorkloadFactory> workloadFactory = GetWorkloadFactory(layer, workloadFactories); + auto outputWorkload = workloadFactory->CreateOutput(outputQueueDescriptor, info); + BOOST_ASSERT_MSG(outputWorkload, "No output workload created"); + m_WorkloadQueue.push_back(move(outputWorkload)); +} + +bool LoadedNetwork::Execute() +{ + bool success = true; + + try + { + for (size_t i = 0; i < m_WorkloadQueue.size(); ++i) + { + m_WorkloadQueue[i]->Execute(); + } + } +#if ARMCOMPUTECL_ENABLED + catch (const cl::Error& error) + { + BOOST_LOG_TRIVIAL(error) << "A CL error occurred attempting to execute a workload: " + << error.what() << ". CL error code is: " << error.err(); + success = false; + } +#endif + catch (const std::runtime_error& error) + { + BOOST_LOG_TRIVIAL(error) << "An error occurred attempting to execute a workload: " << error.what(); + success = false; + } + + return success; +} + +void LoadedNetwork::TidyWorkloadQueue(size_t numInputs, size_t numOutputs) +{ + m_WorkloadQueue.erase(m_WorkloadQueue.begin(), m_WorkloadQueue.begin() + boost::numeric_cast<long>(numInputs)); + m_WorkloadQueue.erase(m_WorkloadQueue.end() - boost::numeric_cast<long>(numOutputs), m_WorkloadQueue.end()); +} + +} diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp new file mode 100644 index 0000000000..d6af11e779 --- /dev/null +++ b/src/armnn/LoadedNetwork.hpp @@ -0,0 +1,59 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "armnn/Tensor.hpp" +#include "armnn/Types.hpp" +#include "Network.hpp" +#include "LayerFwd.hpp" +#include "backends/Workload.hpp" +#include "backends/WorkloadFactory.hpp" + +namespace cl +{ + class Context; + class CommandQueue; + class Device; +} + +namespace armnn +{ + +struct WorkloadFactories; + +class LoadedNetwork +{ +public: + TensorInfo GetInputTensorInfo(LayerBindingId layerId) const; + TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const; + + Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors, + const WorkloadFactories& workloadFactories); + + static std::unique_ptr<LoadedNetwork> MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net, + const WorkloadFactories& workloadFactories); + +private: + LoadedNetwork(std::unique_ptr<OptimizedNetwork> net, const WorkloadFactories& workloadFactories); + + void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo, + const WorkloadFactories& workloadFactories); + + void EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, + const TensorInfo& tensorInfo, const WorkloadFactories& workloadFactories); + + bool Execute(); + + void TidyWorkloadQueue(size_t numInputs, size_t numOutputs); + + const std::shared_ptr<IWorkloadFactory> GetWorkloadFactory(const Layer& layer, + const WorkloadFactories& workloadFactories) const; + + std::unique_ptr<OptimizedNetwork> m_OptimizedNetwork; + + std::vector< std::unique_ptr<IWorkload> > m_WorkloadQueue; +}; + +} diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp new file mode 100644 index 0000000000..4ee68b3c48 --- /dev/null +++ b/src/armnn/Network.cpp @@ -0,0 +1,335 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "Network.hpp" +#include "Graph.hpp" +#include "Layer.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" +#include "Layers.hpp" +#include "Optimizer.hpp" + +#include <armnn/Utils.hpp> + +#include <fcntl.h> +#include <algorithm> +#include <fstream> +#include <memory> + +#include <boost/assert.hpp> +#include <boost/format.hpp> +#include <boost/log/trivial.hpp> +#include <boost/numeric/conversion/converter_policies.hpp> +#include <boost/cast.hpp> + +namespace armnn +{ + +armnn::INetwork* INetwork::CreateRaw() +{ + return new Network(); +} + +armnn::INetworkPtr INetwork::Create() +{ + return INetworkPtr(CreateRaw(), &INetwork::Destroy); +} + +void INetwork::Destroy(INetwork* network) +{ + delete boost::polymorphic_downcast<Network*>(network); +} + +Status Network::PrintGraph() +{ + m_Graph->Print(); + return Status::Success; +} + +void IOptimizedNetwork::Destroy(IOptimizedNetwork* network) +{ + delete boost::polymorphic_downcast<OptimizedNetwork*>(network); +} + +Status OptimizedNetwork::PrintGraph() +{ + m_Graph->Print(); + return Status::Success; +} + +IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, const DeviceSpec& deviceSpec) +{ + const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork); + std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph()); + + OptimizedNetwork* optNet = new OptimizedNetwork(std::move(graph)); + + Optimizer::Get().Optimize(optNet->GetGraph()); + + // Infer the tensor infos for all output slots. Throws an exception on failure. + optNet->GetGraph().InferTensorInfos(); + + // Assign a compute device for all nodes + for (auto&& layer : optNet->GetGraph()) + { + DataType dataType = layer->GetDataType(); + + // Default to the user-requested compute device from the Runtime + layer->SetComputeDevice(deviceSpec.DefaultComputeDevice); + + // If the layer is unsupported by this device, fall back to reference + std::string reasonIfUnsupported; + if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported)) + { + BOOST_LOG_TRIVIAL(warning) << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) << + " is not supported on requested backend " << layer->GetComputeDevice() << " (reason: " << + reasonIfUnsupported << "), falling back to CpuRef backend."; + layer->SetComputeDevice(Compute::CpuRef); + } + + BOOST_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported), + "Layer has no valid compute device"); + } + + optNet->GetGraph().AddCopyLayers(); + + return {optNet, &IOptimizedNetwork::Destroy}; +} + +Network::Network() +: m_Graph(std::make_unique<Graph>()) +{ +} + +Network::~Network() +{ +} + +IConnectableLayer* Network::AddInputLayer(LayerBindingId id, const char* name) +{ + return m_Graph->AddLayer<InputLayer>(id, name); +} + +IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor, + const ConstTensor& weights, + const ConstTensor* biases, + const char* name) +{ + if (fullyConnectedDescriptor.m_BiasEnabled && (biases == nullptr)) + { + throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be NULL"); + } + + const auto layer = m_Graph->AddLayer<FullyConnectedLayer>(fullyConnectedDescriptor, name); + + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights); + + if (fullyConnectedDescriptor.m_BiasEnabled) + { + layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases); + } + + return layer; +} + +IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, + const ConstTensor& weights, + const char* name) +{ + return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, nullptr, name); +} + +IConnectableLayer* Network::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, + const ConstTensor& weights, + const ConstTensor& biases, + const char* name) +{ + return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, &biases, name); +} + +IConnectableLayer* Network::AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const ConstTensor* biases, + const char* name) +{ + if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr)) + { + throw InvalidArgumentException("AddConvolution2dLayer: biases cannot be NULL"); + } + + const auto layer = m_Graph->AddLayer<Convolution2dLayer>(convolution2dDescriptor, name); + + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights); + + if (convolution2dDescriptor.m_BiasEnabled) + { + layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases); + } + + return layer; +} + +IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const char* name) +{ + return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name); +} +IConnectableLayer* Network::AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const ConstTensor& biases, + const char* name) +{ + return AddConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name); +} + +IConnectableLayer* Network::AddDepthwiseConvolution2dLayerImpl( + const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const ConstTensor* biases, + const char* name) +{ + if (convolution2dDescriptor.m_BiasEnabled && (biases == nullptr)) + { + throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be NULL"); + } + + const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name); + + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights); + + if (convolution2dDescriptor.m_BiasEnabled) + { + layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(*biases); + } + + return layer; +} + +IConnectableLayer* Network::AddDepthwiseConvolution2dLayer( + const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const char* name) +{ + return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, nullptr, name); +} +IConnectableLayer* Network::AddDepthwiseConvolution2dLayer( + const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const ConstTensor& biases, + const char* name) +{ + return AddDepthwiseConvolution2dLayerImpl(convolution2dDescriptor, weights, &biases, name); +} + +IConnectableLayer* Network::AddPermuteLayer(const PermuteDescriptor& permuteDescriptor, + const char* name) +{ + return m_Graph->AddLayer<PermuteLayer>(permuteDescriptor, name); +} + +IConnectableLayer* Network::AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor, + const char* name) +{ + return m_Graph->AddLayer<Pooling2dLayer>(pooling2dDescriptor, name); +} + +IConnectableLayer* Network::AddActivationLayer(const ActivationDescriptor& activationDescriptor, + const char* name) +{ + return m_Graph->AddLayer<ActivationLayer>(activationDescriptor, name); +} + +IConnectableLayer* Network::AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor, + const char* name) +{ + return m_Graph->AddLayer<NormalizationLayer>(normalizationDescriptor, name); +} + +IConnectableLayer* Network::AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor, + const char* name) +{ + return m_Graph->AddLayer<SoftmaxLayer>(softmaxDescriptor, name); +} + +IConnectableLayer* Network::AddSplitterLayer(const ViewsDescriptor& splitterDescriptor, + const char* name) +{ + return m_Graph->AddLayer<SplitterLayer>(splitterDescriptor, name); +} + +IConnectableLayer* Network::AddMergerLayer(const OriginsDescriptor& mergerDescriptor, + const char* name) +{ + return m_Graph->AddLayer<MergerLayer>(mergerDescriptor, name); +} + +IConnectableLayer* Network::AddAdditionLayer(const char* name) +{ + return m_Graph->AddLayer<AdditionLayer>(name); +} + +IConnectableLayer* Network::AddMultiplicationLayer(const char* name) +{ + return m_Graph->AddLayer<MultiplicationLayer>(name); +} + +IConnectableLayer* Network::AddOutputLayer(LayerBindingId id, const char* name) +{ + return m_Graph->AddLayer<OutputLayer>(id, name); +} + +IConnectableLayer* Network::AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc, + const ConstTensor& mean, + const ConstTensor& variance, + const ConstTensor& beta, + const ConstTensor& gamma, + const char* name) +{ + const auto layer = m_Graph->AddLayer<BatchNormalizationLayer>(desc, name); + + layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(mean); + layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(variance); + layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta); + layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(gamma); + + return layer; +} + +IConnectableLayer* Network::AddResizeBilinearLayer(const ResizeBilinearDescriptor& resizeDescriptor, const char* name) +{ + return m_Graph->AddLayer<ResizeBilinearLayer>(resizeDescriptor,name); +} + +IConnectableLayer* Network::AddL2NormalizationLayer(const char* name) +{ + return m_Graph->AddLayer<L2NormalizationLayer>(name); +} + +IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name) +{ + return m_Graph->AddLayer<ConstantLayer>(std::make_shared<ScopedCpuTensorHandle>(input), name); +} + +IConnectableLayer* Network::AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor, const char* name) +{ + return m_Graph->AddLayer<ReshapeLayer>(reshapeDescriptor, name); +} + +IConnectableLayer* Network::AddFloorLayer(const char* name) +{ + return m_Graph->AddLayer<FloorLayer>(name); +} + +OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph) + : m_Graph(std::move(graph)) +{ +} + +OptimizedNetwork::~OptimizedNetwork() +{ +} + +} // namespace armnn + diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp new file mode 100644 index 0000000000..de0c1ecf2f --- /dev/null +++ b/src/armnn/Network.hpp @@ -0,0 +1,145 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/DescriptorsFwd.hpp> +#include <armnn/TensorFwd.hpp> +#include <armnn/Types.hpp> + +#include <armnn/INetwork.hpp> + +#include <string> +#include <vector> +#include <memory> + +#include "Layer.hpp" + +namespace armnn +{ +class Graph; + +/// Private implementation of INetwork +class Network final : public INetwork +{ +public: + Network(); + ~Network(); + + const Graph& GetGraph() const { return *m_Graph; } + + Status PrintGraph() override; + + IConnectableLayer* AddInputLayer(LayerBindingId id, const char* name=nullptr) override; + + IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const char* name = nullptr) override; + + IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const ConstTensor& biases, + const char* name = nullptr) override; + + IConnectableLayer* AddDepthwiseConvolution2dLayer( + const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const char* name = nullptr) override; + + IConnectableLayer* AddDepthwiseConvolution2dLayer( + const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const ConstTensor& biases, + const char* name = nullptr) override; + + IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, + const ConstTensor& weights, + const char* name = nullptr) override; + + IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, + const ConstTensor& weights, + const ConstTensor& biases, + const char* name = nullptr) override; + + IConnectableLayer* AddPermuteLayer(const PermuteDescriptor& permuteDescriptor, + const char* name = nullptr) override; + + IConnectableLayer* AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor, + const char* name = nullptr) override; + + IConnectableLayer* AddActivationLayer(const ActivationDescriptor& activationDescriptor, + const char* name = nullptr) override; + + IConnectableLayer* AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor, + const char* name = nullptr) override; + + IConnectableLayer* AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor, + const char* name = nullptr) override; + + IConnectableLayer* AddSplitterLayer(const ViewsDescriptor& splitterDescriptor, + const char* name = nullptr) override; + + IConnectableLayer* AddMergerLayer(const OriginsDescriptor& mergerDescriptor, + const char* name = nullptr) override; + + IConnectableLayer* AddAdditionLayer(const char* name = nullptr) override; + + IConnectableLayer* AddMultiplicationLayer(const char* name = nullptr) override; + + IConnectableLayer* AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc, + const ConstTensor& mean, + const ConstTensor& variance, + const ConstTensor& beta, + const ConstTensor& gamma, + const char* name = nullptr) override; + + IConnectableLayer* AddResizeBilinearLayer(const ResizeBilinearDescriptor& resizeDesc, + const char* name = nullptr) override; + + IConnectableLayer* AddL2NormalizationLayer(const char* name = nullptr) override; + + IConnectableLayer* AddConstantLayer(const ConstTensor& input, const char* name = nullptr) override; + + IConnectableLayer* AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor, + const char* name = nullptr) override; + + IConnectableLayer* AddFloorLayer(const char* name = nullptr) override; + + IConnectableLayer* AddOutputLayer(LayerBindingId id, const char* name = nullptr) override; + +private: + IConnectableLayer* AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor, + const ConstTensor& weights, + const ConstTensor* biases, + const char* name); + + IConnectableLayer* AddConvolution2dLayerImpl(const Convolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const ConstTensor* biases, + const char* name); + + IConnectableLayer* AddDepthwiseConvolution2dLayerImpl( + const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const ConstTensor* biases, + const char* name); + + std::unique_ptr<Graph> m_Graph; +}; + +class OptimizedNetwork final : public IOptimizedNetwork +{ +public: + OptimizedNetwork(std::unique_ptr<Graph> graph); + ~OptimizedNetwork(); + + Status PrintGraph() override; + + Graph& GetGraph() { return *m_Graph; } + +private: + std::unique_ptr<Graph> m_Graph; +}; + +} // namespace armnn diff --git a/src/armnn/Optimizer.cpp b/src/armnn/Optimizer.cpp new file mode 100644 index 0000000000..85b9f2803c --- /dev/null +++ b/src/armnn/Optimizer.cpp @@ -0,0 +1,55 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "Optimizer.hpp" +#include "optimizations/All.hpp" + +namespace armnn +{ + +const Optimizer& Optimizer::Get() +{ + // Add optimizations here + static optimizations::SquashEqualPermuteSiblings squashEqualPermuteSiblings; + static optimizations::SquashEqualReshapeSiblings squashEqualReshapeSiblings; + static optimizations::OptimizeInversePermutes optimizeInversePermutes; + static optimizations::MovePermuteUp movePermuteUp; + static optimizations::PermuteAsReshape permuteAsReshape; + static optimizations::OptimizeConsecutiveReshapes optimizeConsecutiveReshapes; + + // Set optimizations in desired order + static const Optimizer optimizer({ + &squashEqualPermuteSiblings, + &squashEqualReshapeSiblings, + &optimizeInversePermutes, + &movePermuteUp, + &permuteAsReshape, + &optimizeConsecutiveReshapes, + }); + + return optimizer; +} + +void Optimizer::Optimize(Graph& graph) const +{ + auto it = graph.TopologicalSort().end(); + // Call TopologicalSort() in every iteration to re-order the list in case layers where added/removed. + while (it != graph.TopologicalSort().begin()) + { + --it; + for (auto&& optimization : m_Optimizations) + { + optimization->Run(graph, it); + + if ((*it)->IsOutputUnconnected()) + { + it = graph.EraseLayer(it); + break; + } + } + } +} + + +} // namespace armnn diff --git a/src/armnn/Optimizer.hpp b/src/armnn/Optimizer.hpp new file mode 100644 index 0000000000..262f264c28 --- /dev/null +++ b/src/armnn/Optimizer.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <vector> + +namespace armnn +{ + +class Graph; +class Optimization; + +class Optimizer +{ +public: + static const Optimizer& Get(); + + void Optimize(Graph& graph) const; + +private: + ~Optimizer() = default; + + Optimizer(std::initializer_list<Optimization*> optimizations) : m_Optimizations(optimizations) {} + + std::vector<Optimization*> m_Optimizations; +}; + +} // namespace armnn diff --git a/src/armnn/Profiling.cpp b/src/armnn/Profiling.cpp new file mode 100644 index 0000000000..15a195e6bd --- /dev/null +++ b/src/armnn/Profiling.cpp @@ -0,0 +1,293 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "Profiling.hpp" + +#if ARMNN_PROFILING_ENABLED + +#if ARMNN_STREAMLINE_ENABLED +#include <streamline_annotate.h> +#endif + +#if ARMCOMPUTECL_ENABLED +#include <arm_compute/runtime/CL/CLFunctions.h> +#endif + +#include <algorithm> +#include <iomanip> +#include <iostream> +#include <map> +#include <stack> +#include <boost/algorithm/string.hpp> + +namespace armnn +{ + +// Controls the amount of memory initially allocated to store profiling events. +// If chosen carefully, the profiling system will not make any additional allocations, thus minimizing its impact on +// measured times. +constexpr std::size_t g_ProfilingEventCountHint = 1024; + +// Whether profiling reports should include the sequence of events together with their timings. +constexpr bool g_WriteProfilingEventSequence = true; + +// Whether profiling reports should also report detailed information on events grouped by tag. +// This is used to group stats per inference (see usage of ARMNN_UPDATE_PROFILING_EVENT_TAG in +// Runtime::EnqueueWorkload). This can spam the output stream, so use carefully (or adapt +// the code to just output information for a tag of interest). +constexpr bool g_AggregateProfilingEventsByTag = false; + +// Whether a call to Profiler::AnalyzeEventsAndWriteResults() will be made when the Profiler +// singleton is destroyed. It can be convenient for local tests. +constexpr bool g_WriteReportToStdOutOnProfilerDestruction = true; + +// Whether events denoting operations running on the GPU should force a sync before/after the event. +// This is hardcoded to true for now as the profiling timings are not very useful without it. +constexpr bool g_ProfilingForceGpuSync = true; + +std::map<std::string, Profiler::ProfilingEventStats> Profiler::CalculateProfilingEventStats() const +{ + std::map<std::string, ProfilingEventStats> nameToStatsMap; + + for (auto&& event : m_EventSequence) + { + auto mapIter = nameToStatsMap.find(event.m_Label); + if (mapIter != nameToStatsMap.end()) + { + ProfilingEventStats& stats = mapIter->second; + stats.m_TotalMs += event.DurationMs(); + stats.m_MinMs = std::min(stats.m_MinMs, event.DurationMs()); + stats.m_MaxMs = std::max(stats.m_MaxMs, event.DurationMs()); + ++stats.m_Count; + } + else + { + ProfilingEventStats stats; + stats.m_TotalMs = event.DurationMs(); + stats.m_MinMs = event.DurationMs(); + stats.m_MaxMs = event.DurationMs(); + stats.m_Count = 1; + + nameToStatsMap[event.m_Label] = stats; + } + } + + return nameToStatsMap; +} + +void Profiler::AnalyzeEventSequenceAndWriteResults(std::vector<ProfilingEvent>::const_iterator first, + std::vector<ProfilingEvent>::const_iterator last, + std::ostream& outStream) const +{ + // Output event sequence, if needed + if (g_WriteProfilingEventSequence) + { + // Make sure timestamps are output with 6 decimals, and save old settings + std::streamsize oldPrecision = outStream.precision(); + outStream.precision(6); + std::ios_base::fmtflags oldFlags = outStream.flags(); + outStream.setf(std::ios::fixed); + // Output fields + outStream << "Event Sequence - Name | Duration (ms) | Start (ms) | Stop (ms) | Device" << std::endl; + for (auto event = first; event != last; ++event) + { + std::chrono::duration<double, std::milli> startTimeMs = event->m_StartTime.time_since_epoch(); + std::chrono::duration<double, std::milli> stopTimeMs = event->m_StopTime.time_since_epoch(); + + outStream << std::setw(50) << event->m_Label << " " + << std::setw(20) << event->DurationMs() + << std::setw(20) << startTimeMs.count() + << std::setw(20) << stopTimeMs.count() + << std::setw(20) << Profiler::Get().GetEventComputeDevice(event->m_Device) + << std::endl; + } + outStream << std::endl; + // Restore previous precision settings + outStream.flags(oldFlags); + outStream.precision(oldPrecision); + } + + // Aggregate results per event name + std::map<std::string, ProfilingEventStats> nameToStatsMap = CalculateProfilingEventStats(); + + // Output aggregated stats + outStream << "Event Stats - Name | Avg (ms) | Min (ms) | Max (ms) | Total (ms) | Count" << std::endl; + for (const auto& pair : nameToStatsMap) + { + const std::string& eventLabel = pair.first; + const ProfilingEventStats& eventStats = pair.second; + const double avgMs = eventStats.m_TotalMs / double(eventStats.m_Count); + + outStream << "\t" << std::setw(50) << eventLabel << " " << std::setw(9) << avgMs << " " + << std::setw(9) << eventStats.m_MinMs << " " << std::setw(9) << eventStats.m_MaxMs << " " + << std::setw(9) << eventStats.m_TotalMs << " " << std::setw(9) << eventStats.m_Count << std::endl; + } + outStream << std::endl; +} + +Profiler Profiler::s_Instance; + +Profiler::Profiler() + : m_EventTag(0) + , m_NestingLevel(0) + , m_EventTagUpdated(false) +{ + m_EventSequence.reserve(g_ProfilingEventCountHint); + +#if ARMNN_STREAMLINE_ENABLED + // Initialise streamline annotations + ANNOTATE_SETUP; +#endif +} + +Profiler::~Profiler() +{ + if (g_WriteReportToStdOutOnProfilerDestruction) + { + AnalyzeEventsAndWriteResults(std::cout); + } +} + +void Profiler::BeginEvent(Compute compute, const std::string label) +{ + // We need to sync just before the begin event to not include time before the period we want to time. + WaitForDevice(compute); + + const TimePoint timeStamp = Clock::now(); + m_ObservedMarkers.emplace(Marker{m_EventSequence.size(), label, timeStamp, compute, m_EventTag}); + m_EventSequence.emplace_back(); + +#if ARMNN_STREAMLINE_ENABLED + ANNOTATE_CHANNEL_COLOR(m_NestingLevel, GetEventColor(compute), label.c_str()); +#endif + + m_NestingLevel++; +} + +void Profiler::EndEvent(Compute compute) +{ + // We need to sync just before the end event to include all the time of the timed period. + WaitForDevice(compute); + + const Marker& marker = m_ObservedMarkers.top(); + + const TimePoint startTime = marker.m_TimeStamp; + const TimePoint stopTime = Clock::now(); + + m_EventSequence[marker.m_Id] = {std::move(marker.m_EventName), + startTime, + stopTime, + marker.m_ComputeDevice, + marker.m_Tag}; + + m_ObservedMarkers.pop(); + +#if ARMNN_STREAMLINE_ENABLED + ANNOTATE_CHANNEL_END(m_NestingLevel); +#endif + + m_NestingLevel--; +} + +void Profiler::AnalyzeEventsAndWriteResults(std::ostream& outStream) const +{ + // Stack should be empty now. + const bool saneMarkerSequence = m_ObservedMarkers.empty(); + + // Abort if the sequence of markers was found to have incorrect information: + // The stats cannot be trusted. + if (!saneMarkerSequence) + { + outStream << "Cannot write profiling stats. " + "Unexpected errors were found when analyzing the sequence of logged events, which may lead to plainly " + "wrong stats. The profiling system may contain implementation issues or could have been used in an " + "unsafe manner." << std::endl; + return; + } + + // Analyze the full sequence of events + AnalyzeEventSequenceAndWriteResults(m_EventSequence.begin(), m_EventSequence.end(), outStream); + + // Aggregate events by tag if requested (spams the output stream if done for all tags) + if (m_EventTagUpdated && g_AggregateProfilingEventsByTag) + { + outStream << std::endl; + outStream << "***" << std::endl; + outStream << "*** Per Tag Stats" << std::endl; + outStream << "***" << std::endl; + outStream << std::endl; + + for (auto iter = m_EventSequence.begin(); iter != m_EventSequence.end();) + { + const uint32_t tag = iter->m_Tag; + + // Advance iter until we find the first non-matching tag + auto tagEndIter = iter; + for (; tagEndIter != m_EventSequence.end(); ++tagEndIter) + { + if (tagEndIter->m_Tag != tag) + { + break; + } + } + + outStream << "> Begin Tag: " << tag << std::endl; + outStream << std::endl; + AnalyzeEventSequenceAndWriteResults(iter, tagEndIter, outStream); + outStream << std::endl; + outStream << "> End Tag: " << tag << std::endl; + + iter = tagEndIter; + } + } +} + +void Profiler::WaitForDevice(Compute compute) const +{ +#if ARMCOMPUTECL_ENABLED + if(compute == Compute::GpuAcc && g_ProfilingForceGpuSync) + { + arm_compute::CLScheduler::get().sync(); + } +#endif +} + +const char* Profiler::GetEventComputeDevice(Compute compute) const +{ + switch(compute) + { + case Compute::CpuRef: + return "CpuRef"; + case Compute::CpuAcc: + return "CpuAcc"; + case Compute::GpuAcc: + return "GpuAcc"; + default: + return "Undefined"; + } +} + +std::uint32_t Profiler::GetEventColor(Compute compute) const +{ + switch(compute) + { + case Compute::CpuRef: + // Cyan + return 0xffff001b; + case Compute::CpuAcc: + // Green + return 0x00ff001b; + case Compute::GpuAcc: + // Purple + return 0xff007f1b; + default: + // Dark gray + return 0x5555551b; + } +} + +} // namespace armnn + +#endif // ARMNN_PROFILING_ENABLED + diff --git a/src/armnn/Profiling.hpp b/src/armnn/Profiling.hpp new file mode 100644 index 0000000000..88a7adff7c --- /dev/null +++ b/src/armnn/Profiling.hpp @@ -0,0 +1,159 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#if ARMNN_PROFILING_ENABLED + +#include "armnn/ArmNN.hpp" + +#include <chrono> +#include <iosfwd> +#include <ctime> +#include <vector> +#include <stack> +#include <map> + +namespace armnn +{ + +// Clock class that uses the same timestamp function as the Mali DDK +class monotonic_clock { +public: + using duration = std::chrono::nanoseconds; + using time_point = std::chrono::time_point<monotonic_clock, duration>; + + static std::chrono::time_point<monotonic_clock, std::chrono::nanoseconds> now() noexcept + { + timespec ts; +#if defined(CLOCK_MONOTONIC_RAW) + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); +#else + clock_gettime(CLOCK_MONOTONIC, &ts); +#endif + return time_point(std::chrono::nanoseconds(ts.tv_sec*1000000000 + ts.tv_nsec)); + } +}; + +// Simple single-threaded profiler. +// Tracks events reported by BeginEvent()/EndEvent() and outputs detailed information and stats when +// Profiler::AnalyzeEventsAndWriteResults() is called. +class Profiler +{ +public: + // Marks the beginning of a user-defined event. + // No attempt will be made to copy the name string: It must be known at compile time. + void BeginEvent(Compute compute, const std::string name); + + // Marks the end of a user-defined event. + void EndEvent(Compute compute); + + // Increments the event tag, allowing grouping of events in a user-defined manner (e.g. per inference). + void UpdateEventTag() { ++m_EventTag; m_EventTagUpdated = true; } + + // Analyzes the tracked events and writes the results to the given output stream. + // Please refer to the configuration variables in Profiling.cpp to customize the information written. + void AnalyzeEventsAndWriteResults(std::ostream& outStream) const; + + // Accesses the singleton + static Profiler& Get() { return s_Instance; } + + // Gets a string name for a given Compute device enum + const char* GetEventComputeDevice(Compute compute) const; + + // Gets the color to render an event with, based on which device it denotes + std::uint32_t GetEventColor(Compute compute) const; + + typedef monotonic_clock Clock; + typedef std::chrono::time_point<Clock> TimePoint; + +private: + + struct Marker + { + std::size_t m_Id; + const std::string m_EventName; + TimePoint m_TimeStamp; + Compute m_ComputeDevice; + std::uint32_t m_Tag; + }; + + struct ProfilingEvent + { + std::string m_Label; + TimePoint m_StartTime; + TimePoint m_StopTime; + Compute m_Device; + std::uint32_t m_Tag; + + double DurationMs() const + { + return std::chrono::duration<double>(m_StopTime - m_StartTime).count()*1000.0; + } + }; + + struct ProfilingEventStats + { + double m_TotalMs; + double m_MinMs; + double m_MaxMs; + std::uint32_t m_Count; + }; + + Profiler(); + ~Profiler(); + + // Waits for a compute device to finish working to guarantee correct timings. + // Currently used exclusively when emitting profiling events denoting GPU work. + void WaitForDevice(Compute compute) const; + + void AnalyzeEventSequenceAndWriteResults(std::vector<ProfilingEvent>::const_iterator first, + std::vector<ProfilingEvent>::const_iterator last, + std::ostream& outStream) const; + + std::map<std::string, ProfilingEventStats> CalculateProfilingEventStats() const; + + std::stack<Marker> m_ObservedMarkers; + std::vector<ProfilingEvent> m_EventSequence; + std::uint32_t m_EventTag; + std::uint32_t m_NestingLevel; + bool m_EventTagUpdated; + + static Profiler s_Instance; +}; + +// Helper to easily add event markers to the codebase +class ScopedProfilingEvent +{ +public: + ScopedProfilingEvent(Compute compute, const std::string name) + : m_Compute(compute) + { + Profiler::Get().BeginEvent(compute, name); + } + + ~ScopedProfilingEvent() + { + Profiler::Get().EndEvent(m_Compute); + } + +private: + armnn::Compute m_Compute; +}; + +} // namespace armnn + +// Allows grouping events in an user-defined manner (e.g. per inference) +#define ARMNN_UPDATE_PROFILING_EVENT_TAG() armnn::Profiler::Get().UpdateEventTag(); + +// The event name must be known at compile time +#define ARMNN_SCOPED_PROFILING_EVENT(compute, name) armnn::ScopedProfilingEvent e_##__FILE__##__LINE__(compute, name); + +#else + +#define ARMNN_UPDATE_PROFILING_EVENT_TAG() +#define ARMNN_SCOPED_PROFILING_EVENT(compute, name) + +#endif // ARMNN_PROFILING_ENABLED + diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp new file mode 100644 index 0000000000..ea6d19bd31 --- /dev/null +++ b/src/armnn/Runtime.cpp @@ -0,0 +1,118 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "Runtime.hpp" + +#include "armnn/Version.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include <arm_compute/core/CL/OpenCL.h> +#include <arm_compute/core/CL/CLKernelLibrary.h> +#endif + +#include <boost/log/trivial.hpp> +#include <boost/polymorphic_cast.hpp> + +using namespace armnn; +using namespace std; + +namespace armnn +{ + +IRuntime* IRuntime::CreateRaw(const CreationOptions& options) +{ + return new Runtime(options); +} + +IRuntimePtr IRuntime::Create(const CreationOptions& options) +{ + return IRuntimePtr(CreateRaw(options), &IRuntime::Destroy); +} + +void IRuntime::Destroy(IRuntime* runtime) +{ + delete boost::polymorphic_downcast<Runtime*>(runtime); +} + +int Runtime::GenerateNetworkId() +{ + return m_NetworkIdCounter++; +} + +Status Runtime::LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr inNetwork) +{ + IOptimizedNetwork* rawNetwork = inNetwork.release(); + unique_ptr<LoadedNetwork> loadedNetwork = LoadedNetwork::MakeLoadedNetwork( + std::unique_ptr<OptimizedNetwork>(boost::polymorphic_downcast<OptimizedNetwork*>(rawNetwork)), + m_WorkloadFactories); + + if (!loadedNetwork) + { + return Status::Failure; + } + + networkIdOut = GenerateNetworkId(); + + // store the network + m_LoadedNetworks[networkIdOut] = std::move(loadedNetwork); + + return Status::Success; + +} + +Status Runtime::UnloadNetwork(NetworkId networkId) +{ + if (m_LoadedNetworks.erase(networkId) == 0) + { + BOOST_LOG_TRIVIAL(warning) << "WARNING: Runtime::UnloadNetwork(): " << networkId << " not found!"; + return Status::Failure; + } +#ifdef ARMCOMPUTECL_ENABLED + arm_compute::CLKernelLibrary::get().clear_programs_cache(); +#endif + BOOST_LOG_TRIVIAL(debug) << "Runtime::UnloadNetwork(): Unloaded network with ID: " << networkId; + return Status::Success; +} + +Runtime::Runtime(const CreationOptions& options) +: m_NetworkIdCounter(0) +{ + BOOST_LOG_TRIVIAL(info) << "ArmNN v" << ARMNN_VERSION << "\n"; + BOOST_LOG_TRIVIAL(info) << "Using compute device: " << options.m_DefaultComputeDevice << "\n"; + m_DeviceSpec.DefaultComputeDevice = options.m_DefaultComputeDevice; + + // If useCpuRefAsFallback is false, the reference workload factory will be prevented from creating + // operation workloads, unless the default compute device is precisely the reference backend. + m_WorkloadFactories.m_CpuRef = make_shared<RefWorkloadFactory>( + options.m_DefaultComputeDevice == Compute::CpuRef ? true : options.m_UseCpuRefAsFallback); + m_WorkloadFactories.m_CpuAcc = make_shared<NeonWorkloadFactory>(); + m_WorkloadFactories.m_GpuAcc = make_shared<ClWorkloadFactory>(); + + if (options.m_DefaultComputeDevice == Compute::GpuAcc) + { + m_WorkloadFactories.m_GpuAcc.get()->LoadOpenClRuntime(options.m_ClTunedParameters); + } +} + +TensorInfo Runtime::GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const +{ + LoadedNetwork* net = m_LoadedNetworks.at(networkId).get(); + return net->GetInputTensorInfo(layerId); +} + +TensorInfo Runtime::GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const +{ + const LoadedNetwork* net = m_LoadedNetworks.at(networkId).get(); + return net->GetOutputTensorInfo(layerId); +} + +Status Runtime::EnqueueWorkload(NetworkId networkId, + const InputTensors& inputTensors, + const OutputTensors& outputTensors) +{ + LoadedNetwork* loadedNetwork = m_LoadedNetworks.at(networkId).get(); + return loadedNetwork->EnqueueWorkload(inputTensors, outputTensors, m_WorkloadFactories); +} + +} diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp new file mode 100644 index 0000000000..d3f3a578f3 --- /dev/null +++ b/src/armnn/Runtime.hpp @@ -0,0 +1,73 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "LoadedNetwork.hpp" +#include "armnn/INetwork.hpp" +#include "armnn/IRuntime.hpp" +#include "armnn/Tensor.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/NeonWorkloadFactory.hpp" +#include "backends/ClWorkloadFactory.hpp" + +#include <unordered_map> + +namespace armnn +{ + +struct WorkloadFactories +{ + std::shared_ptr<RefWorkloadFactory> m_CpuRef; + std::shared_ptr<NeonWorkloadFactory> m_CpuAcc; + std::shared_ptr<ClWorkloadFactory> m_GpuAcc; +}; + +class Runtime final : public IRuntime +{ +public: + /// Load a complete network into the Runtime. + /// @param [out] networkIdOut Unique identifier for the network is returned in this reference. + /// @param [in] network Complete network to load into the Runtime. + /// The runtime takes ownership of the network once passed in. + /// @return armnn::Status + virtual Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network) override; + + virtual TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const override; + virtual TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const override; + + // Evaluate network using input in inputTensors, outputs filled into outputTensors + virtual Status EnqueueWorkload(NetworkId networkId, + const InputTensors& inputTensors, + const OutputTensors& outputTensors) override; + + /// Unload a network from the Runtime. + /// At the moment this only removes the network from the m_Impl->m_Network. + /// This might need more work in the future to be AndroidNN compliant. + /// @param [in] networkId Unique identifier for the network to be unloaded. Generated in LoadNetwork(). + /// @return armnn::Status + virtual Status UnloadNetwork(NetworkId networkId) override; + + virtual const DeviceSpec& GetDeviceSpec() const override { return m_DeviceSpec; } + + /// Creates a runtime for workload execution. + /// May throw a ClRuntimeUnavailableException if @a defaultComputeDevice requires a CL runtime but + /// it cannot be setup for some reason. + Runtime(const CreationOptions& options); + +private: + friend void RuntimeLoadedNetworksReserve(armnn::Runtime* runtime); // see RuntimeTests.cpp + + int GenerateNetworkId(); + + std::unordered_map<NetworkId, std::unique_ptr<LoadedNetwork>> m_LoadedNetworks; + + WorkloadFactories m_WorkloadFactories; + + int m_NetworkIdCounter; + + DeviceSpec m_DeviceSpec; +}; + +} diff --git a/src/armnn/Tensor.cpp b/src/armnn/Tensor.cpp new file mode 100644 index 0000000000..2e04c8c617 --- /dev/null +++ b/src/armnn/Tensor.cpp @@ -0,0 +1,187 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "armnn/Tensor.hpp" +#include "armnn/Utils.hpp" +#include "armnn/Exceptions.hpp" +#include "armnn/TypesUtils.hpp" + +#include <boost/assert.hpp> +#include <boost/log/trivial.hpp> +#include <boost/numeric/conversion/cast.hpp> + +namespace armnn +{ + +// --- +// --- TensorShape +// --- + +TensorShape::TensorShape() + : m_NumDimensions(0) +{ +} + +TensorShape::TensorShape(const unsigned int numDimensions, const unsigned int* const dimensionSizes) + : m_NumDimensions(numDimensions) +{ + if (numDimensions < 1) + { + throw InvalidArgumentException("Tensor numDimensions must be greater than 0"); + } + + if (numDimensions > MaxNumOfTensorDimensions) + { + throw InvalidArgumentException("Tensor numDimensions must be less than or equal to MaxNumOfTensorDimensions"); + } + + if (dimensionSizes == nullptr) + { + throw InvalidArgumentException("Tensor dimensionSizes must not be NULL"); + } + + std::copy(dimensionSizes, dimensionSizes + numDimensions, m_Dimensions.begin()); +} + +TensorShape::TensorShape(std::initializer_list<unsigned int> dimensionSizeList) + : TensorShape(boost::numeric_cast<unsigned int>(dimensionSizeList.size()), dimensionSizeList.begin()) +{ +} + +TensorShape::TensorShape(const TensorShape& other) + : m_NumDimensions(other.m_NumDimensions) +{ + std::copy(other.m_Dimensions.cbegin(), other.m_Dimensions.cbegin() + other.m_NumDimensions, m_Dimensions.begin()); +} + +TensorShape& TensorShape::operator =(const TensorShape& other) +{ + m_NumDimensions = other.m_NumDimensions; + std::copy(other.m_Dimensions.cbegin(), other.m_Dimensions.cbegin() + other.m_NumDimensions, m_Dimensions.begin()); + return *this; +} + +bool TensorShape::operator==(const TensorShape& other) const +{ + return ((m_NumDimensions == other.m_NumDimensions) && + std::equal(m_Dimensions.cbegin(), m_Dimensions.cbegin() + m_NumDimensions, other.m_Dimensions.cbegin())); +} + +bool TensorShape::operator!=(const TensorShape& other) const +{ + return !(*this == other); +} + +unsigned int TensorShape::GetNumElements() const +{ + if (m_NumDimensions == 0) + { + return 0; + } + + unsigned int count = 1; + for (unsigned int i = 0; i < m_NumDimensions; i++) + { + count *= m_Dimensions[i]; + } + + return count; +} + +// --- +// --- TensorInfo +// --- + +TensorInfo::TensorInfo() +: m_DataType(DataType::Float32) +{ +} + +TensorInfo::TensorInfo(const TensorShape& shape, DataType dataType, + float quantizationScale, int32_t quantizationOffset) + : m_Shape(shape) + , m_DataType(dataType) +{ + m_Quantization.m_Scale = quantizationScale; + m_Quantization.m_Offset = quantizationOffset; +} + +TensorInfo::TensorInfo(unsigned int numDimensions, const unsigned int* dimensionSizes, DataType dataType, + float quantizationScale, int32_t quantizationOffset) + : m_Shape(numDimensions, dimensionSizes) + , m_DataType(dataType) +{ + m_Quantization.m_Scale = quantizationScale; + m_Quantization.m_Offset = quantizationOffset; +} + +TensorInfo::TensorInfo(const TensorInfo& other) +: m_Shape(other.m_Shape) +, m_DataType(other.m_DataType) +, m_Quantization(other.m_Quantization) +{ +} + +TensorInfo& TensorInfo::operator=(const TensorInfo& other) +{ + m_Shape = other.m_Shape; + m_DataType = other.m_DataType; + m_Quantization = other.m_Quantization; + return *this; +} + +bool TensorInfo::operator==(const TensorInfo& other) const +{ + return ((m_Shape == other.m_Shape) && + (m_DataType == other.m_DataType) && + (m_Quantization == other.m_Quantization)); +} + +bool TensorInfo::operator!=(const TensorInfo& other) const +{ + return !(*this == other); +} + +unsigned int TensorInfo::GetNumBytes() const +{ + return GetDataTypeSize(m_DataType) * GetNumElements(); +} + +// --- +// --- BaseTensor +// --- + +template<typename MemoryType> +BaseTensor<MemoryType>::BaseTensor() + : m_MemoryArea(nullptr) +{ +} + +template<typename MemoryType> +BaseTensor<MemoryType>::BaseTensor(const TensorInfo& info, MemoryType memoryArea) + : m_MemoryArea(memoryArea) + , m_Info(info) +{ +} + +template<typename MemoryType> +BaseTensor<MemoryType>::BaseTensor(const BaseTensor<MemoryType>& other) + : m_MemoryArea(other.m_MemoryArea) + , m_Info(other.GetInfo()) +{ +} + +template<typename MemoryType> +BaseTensor<MemoryType>& BaseTensor<MemoryType>::operator =(const BaseTensor<MemoryType>& other) +{ + m_Info = other.m_Info; + m_MemoryArea = other.m_MemoryArea; + return *this; +} + +// Explicit instantiations +template class BaseTensor<const void*>; +template class BaseTensor<void*>; + +} // namespace armnn diff --git a/src/armnn/Utils.cpp b/src/armnn/Utils.cpp new file mode 100644 index 0000000000..fb8f4d6f72 --- /dev/null +++ b/src/armnn/Utils.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "armnn/Utils.hpp" +#include "Logging.hpp" + +#include <boost/log/core.hpp> + +namespace armnn +{ + +void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity) +{ + armnnUtils::ConfigureLogging(boost::log::core::get().get(), printToStandardOutput, printToDebugOutput, severity); +} + +// Default to logging completely disabled. +// The user of the library must enable it if they want by calling armnn::ConfigureLogging(). +struct DefaultLoggingConfiguration +{ + DefaultLoggingConfiguration() + { + ConfigureLogging(false, false, LogSeverity::Trace); + } +}; + +static DefaultLoggingConfiguration g_DefaultLoggingConfiguration; + +}
\ No newline at end of file diff --git a/src/armnn/backends/ArmComputeTensorUtils.cpp b/src/armnn/backends/ArmComputeTensorUtils.cpp new file mode 100644 index 0000000000..9f21c41a2f --- /dev/null +++ b/src/armnn/backends/ArmComputeTensorUtils.cpp @@ -0,0 +1,131 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "ArmComputeTensorUtils.hpp" +#include "ArmComputeUtils.hpp" + +#include <armnn/Descriptors.hpp> + +namespace armnn +{ +namespace armcomputetensorutils +{ + +arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType) +{ + switch(dataType) + { + case armnn::DataType::Float32: + { + return arm_compute::DataType::F32; + } + case armnn::DataType::QuantisedAsymm8: + { + return arm_compute::DataType::QASYMM8; + } + case armnn::DataType::Signed32: + { + return arm_compute::DataType::S32; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + return arm_compute::DataType::UNKNOWN; + } + } +} + +arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape) +{ + arm_compute::TensorShape shape; + + // armnn tensors are (batch, channels, height, width) + // arm_compute tensors are (width, height, channels, batch) + for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); i++) + { + // note that our dimensions are stored in the opposite order to ACL's + shape.set(tensorShape.GetNumDimensions() - i - 1, tensorShape[i]); + + // TensorShape::set() flattens leading ones, so that batch size 1 cannot happen. + // arm_compute tensors expect this + } + + // prevent arm_compute issue where tensor is flattened to nothing + if (shape.num_dimensions() == 0) + { + shape.set_num_dimensions(1); + } + + return shape; +} + +// Utility function used to build a TensorInfo object, that can be used to initialise +// ARM Compute Tensor and CLTensor allocators. +arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo) +{ + const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape()); + const arm_compute::DataType aclDataType = GetArmComputeDataType(tensorInfo.GetDataType()); + const arm_compute::QuantizationInfo aclQuantizationInfo(tensorInfo.GetQuantizationScale(), + tensorInfo.GetQuantizationOffset()); + + return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo); +} + +arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor) +{ + using arm_compute::PoolingType; + using arm_compute::DimensionRoundingType; + using arm_compute::PadStrideInfo; + using arm_compute::PoolingLayerInfo; + + // Resolve ARM Compute layer parameters + const PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType); + const DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType( + descriptor.m_OutputShapeRounding); + + const PadStrideInfo padStrideInfo(descriptor.m_StrideX, + descriptor.m_StrideY, + descriptor.m_PadLeft, + descriptor.m_PadRight, + descriptor.m_PadTop, + descriptor.m_PadBottom, + rounding); + + const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude); + + return arm_compute::PoolingLayerInfo(poolingType, descriptor.m_PoolWidth, padStrideInfo, excludePadding); +} + +arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor) +{ + const arm_compute::NormType normType = + ConvertNormalizationAlgorithmChannelToAclNormType(descriptor.m_NormChannelType); + return arm_compute::NormalizationLayerInfo(normType, + descriptor.m_NormSize, + descriptor.m_Alpha, + descriptor.m_Beta, + descriptor.m_K, + false); +} + +arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& perm) +{ + arm_compute::PermutationVector aclPerm; + + unsigned int start = 0; + while ((start == perm[start]) && (start < perm.GetSize())) + { + ++start; + } + + for (unsigned int i = start; i < perm.GetSize(); ++i) + { + aclPerm.set(i - start, perm[i] - start); + } + + return aclPerm; +} + +} // namespace armcomputetensorutils +} // namespace armnn diff --git a/src/armnn/backends/ArmComputeTensorUtils.hpp b/src/armnn/backends/ArmComputeTensorUtils.hpp new file mode 100644 index 0000000000..9a13caf495 --- /dev/null +++ b/src/armnn/backends/ArmComputeTensorUtils.hpp @@ -0,0 +1,146 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/Tensor.hpp> +#include <armnn/DescriptorsFwd.hpp> + +#include <arm_compute/core/ITensor.h> +#include <arm_compute/core/TensorInfo.h> + +#include <boost/cast.hpp> + +namespace armnn +{ +class ITensorHandle; + +namespace armcomputetensorutils +{ + +/// Utility function to map an armnn::DataType to corresponding arm_compute::DataType +arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType); + +/// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape +arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape); + +/// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given +/// armnn::ITensorInfo +arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo); + +/// Utility function used to setup an arm_compute::PoolingLayerInfo object from an armnn::Pooling2dDescriptor +arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor); + +/// Utility function to setup an arm_compute::NormalizationLayerInfo object from an armnn::NormalizationDescriptor +arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& desc); + +/// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector +arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& vector); + +/// Sets up the given ArmCompute tensor's dimensions based on the given ArmNN tensor. +template <typename Tensor> +void BuildArmComputeTensor(Tensor& tensor, const armnn::TensorInfo& tensorInfo) +{ + tensor.allocator()->init(BuildArmComputeTensorInfo(tensorInfo)); +} + +template <typename Tensor> +void InitialiseArmComputeTensorEmpty(Tensor& tensor) +{ + tensor.allocator()->allocate(); +} + +// Helper function to obtain byte offset into tensor data +inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info, + uint32_t batchIndex, + uint32_t channelIndex, + uint32_t y, + uint32_t x) +{ + arm_compute::Coordinates coords; + coords.set(3, boost::numeric_cast<int>(batchIndex)); + coords.set(2, boost::numeric_cast<int>(channelIndex)); + coords.set(1, boost::numeric_cast<int>(y)); + coords.set(0, boost::numeric_cast<int>(x)); + return info.offset_element_in_bytes(coords); +} + +// Helper function to obtain element offset into data buffer representing tensor data (assuming no strides) +inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info, + uint32_t batchIndex, + uint32_t channelIndex, + uint32_t y, + uint32_t x) +{ + const arm_compute::TensorShape& shape = info.tensor_shape(); + uint32_t width = boost::numeric_cast<uint32_t>(shape[0]); + uint32_t height = boost::numeric_cast<uint32_t>(shape[1]); + uint32_t numChannels = boost::numeric_cast<uint32_t>(shape[2]); + return ((batchIndex * numChannels + channelIndex) * height + y) * width + x; +} + +template <typename T> +void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData) +{ + // if MaxNumOfTensorDimensions is increased, this loop will need fixing + static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData"); + { + const arm_compute::ITensorInfo& info = *srcTensor.info(); + const arm_compute::TensorShape& shape = info.tensor_shape(); + const uint8_t* const bufferPtr = srcTensor.buffer(); + uint32_t width = boost::numeric_cast<uint32_t>(shape[0]); + uint32_t height = boost::numeric_cast<uint32_t>(shape[1]); + uint32_t numChannels = boost::numeric_cast<uint32_t>(shape[2]); + uint32_t numBatches = boost::numeric_cast<uint32_t>(shape[3]); + + for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) + { + for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex) + { + for (unsigned int y = 0; y < height; ++y) + { + // Copy one row from arm_compute tensor buffer to linear memory buffer + // A row is the largest contiguous region we can copy, as the tensor data may be using strides + memcpy(dstData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0), + bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0), + width * sizeof(T)); + } + } + } + } +} + +template <typename T> +void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor) +{ + // if MaxNumOfTensorDimensions is increased, this loop will need fixing + static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData"); + { + const arm_compute::ITensorInfo& info = *dstTensor.info(); + const arm_compute::TensorShape& shape = info.tensor_shape(); + uint8_t* const bufferPtr = dstTensor.buffer(); + uint32_t width = boost::numeric_cast<uint32_t>(shape[0]); + uint32_t height = boost::numeric_cast<uint32_t>(shape[1]); + uint32_t numChannels = boost::numeric_cast<uint32_t>(shape[2]); + uint32_t numBatches = boost::numeric_cast<uint32_t>(shape[3]); + + for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) + { + for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex) + { + for (unsigned int y = 0; y < height; ++y) + { + // Copy one row from linear memory buffer to arm_compute tensor buffer + // A row is the largest contiguous region we can copy, as the tensor data may be using strides + memcpy(bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0), + srcData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0), + width * sizeof(T)); + } + } + } + } +} + +} // namespace armcomputetensorutils +} // namespace armnn diff --git a/src/armnn/backends/ArmComputeUtils.hpp b/src/armnn/backends/ArmComputeUtils.hpp new file mode 100644 index 0000000000..c451e6434b --- /dev/null +++ b/src/armnn/backends/ArmComputeUtils.hpp @@ -0,0 +1,117 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#if ARMCOMPUTENEON_ENABLED || ARMCOMPUTECL_ENABLED + +#include <armnn/Tensor.hpp> +#include <armnn/Descriptors.hpp> + +#include <arm_compute/core/Types.h> + +namespace armnn +{ + +inline arm_compute::NormalizationLayerInfo +CreateAclNormalizationLayerInfoForL2Normalization(const armnn::TensorInfo& tensorInfo) +{ + const unsigned int depth = tensorInfo.GetShape()[1]; + + // At the time of writing, {CL|Neon}L2Normalization performs the reduction only along dimension 0. This version of + // L2 Normalization always performs the reduction along the depth axis, though. Thus, we repurpose + // {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by carefully chosing the normalization + // parameters. + // + // Please refer to both the reference implementation of the normalization layer and the implementation of + // {CL|Neon}NormalizationLayer when checking the derivations for the parameter values below. + + // Make sure normalization covers the entire depth range. ACL requires the normalization size to be odd. + // CL: This does not result in extra kernel threads not doing any work: See usage of the RADIUS parameter in + // ACL's normalization_layer_cross_map() CL function. + const uint32_t normSize = depth * 2u + 1u; + + // See ACL's NormalizationLayerInfo::scale_coeff() definition. + // For the reference implementation, to make alpha_ become 1, we'd have to use alpha = normSize instead. + const float alpha = 1.0f; + + // Don't offset the reduction + const float kappa = 0.0f; + + // pow(reduction, -0.5) = 1 / sqrt(reduction) + const float beta = 0.5f; + + return arm_compute::NormalizationLayerInfo(arm_compute::NormType::CROSS_MAP, normSize, alpha, beta, kappa, false); +} + +inline arm_compute::ActivationLayerInfo::ActivationFunction +ConvertActivationFunctionToAclActivationFunction(ActivationFunction armnnFunction) +{ + using AclActivationFunction = arm_compute::ActivationLayerInfo::ActivationFunction; + + switch (armnnFunction) + { + case ActivationFunction::Linear: return AclActivationFunction::LINEAR; + // Arm compute's 'logistic' function is non-parameterized, so it is exactly a sigmoid function + case ActivationFunction::Sigmoid: return AclActivationFunction::LOGISTIC; + case ActivationFunction::ReLu: return AclActivationFunction::RELU; + case ActivationFunction::BoundedReLu: return AclActivationFunction::LU_BOUNDED_RELU; + case ActivationFunction::SoftReLu: return AclActivationFunction::SOFT_RELU; + case ActivationFunction::LeakyReLu: return AclActivationFunction::LEAKY_RELU; + case ActivationFunction::Abs: return AclActivationFunction::ABS; + case ActivationFunction::Sqrt: return AclActivationFunction::SQRT; + case ActivationFunction::Square: return AclActivationFunction::SQUARE; + case ActivationFunction::TanH: return AclActivationFunction::TANH; + default: throw InvalidArgumentException("Unsupported activation function"); + } +} + +inline arm_compute::ActivationLayerInfo +ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor& actDesc) +{ + return arm_compute::ActivationLayerInfo(ConvertActivationFunctionToAclActivationFunction(actDesc.m_Function), + actDesc.m_A, actDesc.m_B); +} + +inline arm_compute::PoolingType ConvertPoolingAlgorithmToAclPoolingType(PoolingAlgorithm poolingAlgorithm) +{ + using arm_compute::PoolingType; + + switch (poolingAlgorithm) + { + case PoolingAlgorithm::Max: return PoolingType::MAX; + case PoolingAlgorithm::Average: return PoolingType::AVG; + case PoolingAlgorithm::L2: return PoolingType::L2; + default: throw InvalidArgumentException("Unsupported pooling algorithm"); + } +} + +inline arm_compute::DimensionRoundingType ConvertOutputShapeRoundingToAclDimensionRoundingType(OutputShapeRounding + rounding) +{ + using arm_compute::DimensionRoundingType; + + switch (rounding) + { + case OutputShapeRounding::Ceiling: return DimensionRoundingType::CEIL; + case OutputShapeRounding::Floor: return DimensionRoundingType::FLOOR; + default: throw InvalidArgumentException("Unsupported Output Shape Rounding type"); + } +} + +inline arm_compute::NormType +ConvertNormalizationAlgorithmChannelToAclNormType(NormalizationAlgorithmChannel channelType) +{ + using arm_compute::NormType; + switch (channelType) + { + case NormalizationAlgorithmChannel::Across: return NormType::CROSS_MAP; + case NormalizationAlgorithmChannel::Within: return NormType::IN_MAP_2D; + default: throw InvalidArgumentException("Unsupported normalization algorithm channel type"); + } +} + +} + +#endif // ARMCOMPUTENEON_ENABLED || ARMCOMPUTECL_ENABLED diff --git a/src/armnn/backends/ClLayerSupport.cpp b/src/armnn/backends/ClLayerSupport.cpp new file mode 100644 index 0000000000..5f0e4ea622 --- /dev/null +++ b/src/armnn/backends/ClLayerSupport.cpp @@ -0,0 +1,405 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "LayerSupportCommon.hpp" + +#include "ClLayerSupport.hpp" +#include "InternalTypes.hpp" + +#include <armnn/Descriptors.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> + +#include <boost/core/ignore_unused.hpp> + +#ifdef ARMCOMPUTECL_ENABLED +#include "ClWorkloads/ClAdditionFloat32Workload.hpp" +#include "ClWorkloads/ClPooling2dBaseWorkload.hpp" +#include "ClWorkloads/ClPermuteWorkload.hpp" +#include "ClWorkloads/ClNormalizationFloat32Workload.hpp" +#endif + +using namespace boost; + +namespace armnn +{ +namespace +{ +template<unsigned int FilterSize> +bool IsMatchingSize2d(const TensorInfo& weightInfo) +{ + // Width & Height must match + return (weightInfo.GetShape()[3] == FilterSize) && (weightInfo.GetShape()[2] == FilterSize); +} + +template<uint32_t ValidStride> +bool IsMatchingStride(uint32_t actualStride) +{ + return ValidStride == actualStride; +} + +template<uint32_t FirstStride, uint32_t SecondStride, uint32_t... ValidStrides> +bool IsMatchingStride(uint32_t actualStride) +{ + return IsMatchingStride<FirstStride>(actualStride) || IsMatchingStride<SecondStride, ValidStrides...>(actualStride); +}; + +bool IsClBackendSupported(std::string* reasonIfUnsupported) +{ +#if ARMCOMPUTECL_ENABLED + return true; +#else + if (reasonIfUnsupported != nullptr) + { + *reasonIfUnsupported = "The armnn library has been built without CL support"; + } + return false; +#endif +} + +#if ARMCOMPUTECL_ENABLED +#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) (expr) +#else +#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) IsClBackendSupported(reasonIfUnsupported) +#endif + +#if ARMCOMPUTECL_ENABLED +template<class FuncType, class... Args> +inline bool IsWorkloadSupported(FuncType&& func, std::string* reasonIfUnsupported, Args&&... args) +{ + arm_compute::Status aclStatus = func(std::forward<Args>(args)...); + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + return supported; +} + +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); +#else +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsClBackendSupported(reasonIfUnsupported); +#endif + +} //namespace + +bool IsClActivationUint8Supported(std::string* reasonIfUnsupported, const ActivationDescriptor& parameters) +{ + if (parameters.m_Function != ActivationFunction::BoundedReLu) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Unsupported activation function, only BoundedReLu is supported"; + } + + return false; + } + + return true; +} + +bool IsClDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, + const DepthwiseConvolution2dDescriptor& parameters, + const TensorInfo& weights) +{ + if (weights.GetNumDimensions() != 4) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Depwthwise convolution Weight tensor needs to be 4d"; + } + return false; + } + // weights.GetShape()[0] = channel multiplier + if (weights.GetShape()[0] != 1) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Channel multiplier only supports the value 1 in the CL backend"; + } + return false; + } + else if ((weights.GetDataType() == armnn::DataType::QuantisedAsymm8) && !IsMatchingSize2d<3>(weights)) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "CL backend only supports 3x3 filtering for Depthwise Convolution on 8-bit"; + } + return false; + } + + return true; +} + +template<typename Float32Func, typename Uint8Func, typename ... Params> +bool IsSupportedForDataTypeCl(std::string* reasonIfUnsupported, + DataType dataType, + Float32Func floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + return IsClBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + dataType, + floatFuncPtr, + uint8FuncPtr, + std::forward<Params>(params)...); +} + +bool IsActivationSupportedCl(const TensorInfo& input, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<const ActivationDescriptor&>, + &IsClActivationUint8Supported, + descriptor); +} + +bool IsAdditionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return FORWARD_CL_LAYER_SUPPORT_FUNC(ClAdditionFloat32Workload::IsSupported(input0, + input1, + output, + reasonIfUnsupported)); +} + +bool IsBatchNormalizationSupportedCl(const TensorInfo& input, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<const BatchNormalizationDescriptor&>, + &FalseFuncU8<const BatchNormalizationDescriptor&>, + descriptor); +} + +bool IsConstantSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) +{ + bool isSupported = false; + + bool strideXIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideX); + bool strideXIsThree = IsMatchingStride<3>(desc.m_StrideX); + + bool strideYIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideY); + bool strideYIsThree = IsMatchingStride<3>(desc.m_StrideY); + + bool strideIsOneOrTwo = strideXIsOneOrTwo && strideYIsOneOrTwo; + bool strideIsOneOrTwoOrThree = ( strideXIsOneOrTwo || strideXIsThree ) && ( strideYIsOneOrTwo || strideYIsThree ); + + // 1x1 convolution with strides of 1,2,3 + isSupported |= IsMatchingSize2d<1>(weightInfo) && ( strideIsOneOrTwoOrThree ); + + // 3x3 convolution with strides of 1,2 + isSupported |= IsMatchingSize2d<3>(weightInfo) && ( strideIsOneOrTwo ); + + // 5x5 convolution with strides of 1,2 + isSupported |= IsMatchingSize2d<5>(weightInfo) && ( strideIsOneOrTwo ); + + //fall back to normal convolution for the asymmetric padding case. + if (desc.m_PadLeft != desc.m_PadRight || + desc.m_PadTop != desc.m_PadBottom) + { + //direct convolution does not support asymmetric padding yet. + isSupported = false; + } + + return isSupported; +} + +bool IsDirectConvolution2dParamsSupportedCl(std::string* reasonIfUnsupported, + const Convolution2dDescriptor& parameters, + const TensorInfo& weightInfo) +{ + return IsClDirectConvolution2dSupported(weightInfo, parameters); +} + +bool IsConvolution2dSupportedCl(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<decltype(descriptor), decltype(weights)>, + &IsDirectConvolution2dParamsSupportedCl, + descriptor, + weights); +} + +bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &IsClDepthwiseConvolution2dDescParamsSupported, + &IsClDepthwiseConvolution2dDescParamsSupported, + descriptor, + weights); +} + +bool IsFullyConnectedSupportedCl(const TensorInfo& input, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsInputSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsL2NormalizationSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsMergerSupportedCl(const std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsMultiplicationSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsOutputSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPermuteSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(output); + FORWARD_WORKLOAD_VALIDATE_FUNC(ClPermuteWorkloadValidate, reasonIfUnsupported, descriptor); +} + +bool IsPooling2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsResizeBilinearSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsSoftmaxSupportedCl(const TensorInfo& input, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsSplitterSupportedCl(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFakeQuantizationSupportedCl(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(descriptor); + return false; +} + +bool IsReshapeSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + return true; +} + +bool IsFloorSupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +} diff --git a/src/armnn/backends/ClLayerSupport.hpp b/src/armnn/backends/ClLayerSupport.hpp new file mode 100644 index 0000000000..f5b5ae8b15 --- /dev/null +++ b/src/armnn/backends/ClLayerSupport.hpp @@ -0,0 +1,102 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/DescriptorsFwd.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> + +namespace armnn +{ +bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc); +bool IsClActivationUint8Supported(std::string* reasonIfUnsupported, const ActivationDescriptor& parameters); +bool IsClDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, + const DepthwiseConvolution2dDescriptor& parameters, + const TensorInfo& weights); + +bool IsActivationSupportedCl(const TensorInfo& input, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsAdditionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsBatchNormalizationSupportedCl(const TensorInfo& input, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConstantSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvolution2dSupportedCl(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported = nullptr); + +bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported = nullptr); + +bool IsFullyConnectedSupportedCl(const TensorInfo& input, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsInputSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsL2NormalizationSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsMergerSupportedCl(const std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsMultiplicationSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + std::string* reasonIfUnsupported = nullptr); + +bool IsNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsOutputSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsPermuteSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPooling2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsResizeBilinearSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsSoftmaxSupportedCl(const TensorInfo& input, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsSplitterSupportedCl(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsFakeQuantizationSupportedCl(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsReshapeSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsFloorSupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); +} diff --git a/src/armnn/backends/ClTensorHandle.hpp b/src/armnn/backends/ClTensorHandle.hpp new file mode 100644 index 0000000000..49e18dad59 --- /dev/null +++ b/src/armnn/backends/ClTensorHandle.hpp @@ -0,0 +1,86 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "OutputHandler.hpp" +#include "ArmComputeTensorUtils.hpp" + +#include <arm_compute/runtime/CL/CLTensor.h> +#include <arm_compute/runtime/CL/CLSubTensor.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/Coordinates.h> + + +namespace armnn +{ + + +class IClTensorHandle : public ITensorHandle +{ +public: + virtual arm_compute::ICLTensor& GetTensor() = 0; + virtual arm_compute::ICLTensor const& GetTensor() const = 0; + virtual void Map(bool blocking = true) = 0; + virtual void UnMap() = 0; + virtual arm_compute::DataType GetDataType() const = 0; +}; + +class ClTensorHandle : public IClTensorHandle +{ +public: + ClTensorHandle(const TensorInfo& tensorInfo) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); + } + + arm_compute::CLTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; } + virtual void Allocate() override {armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);}; + + virtual void Map(bool blocking = true) override {m_Tensor.map(blocking);} + virtual void UnMap() override { m_Tensor.unmap();} + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL;} + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + +private: + arm_compute::CLTensor m_Tensor; + +}; + +class ClSubTensorHandle : public IClTensorHandle +{ +public: + ClSubTensorHandle(arm_compute::ICLTensor& parent, + const arm_compute::TensorShape& shape, + const arm_compute::Coordinates& coords) + : m_Tensor(&parent, shape, coords) + { + } + + arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; } + virtual void Allocate() override {}; + + virtual void Map(bool blocking = true) override {m_Tensor.map(blocking);} + virtual void UnMap() override { m_Tensor.unmap();} + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL;} + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + +private: + arm_compute::CLSubTensor m_Tensor; + +}; + +}
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloadFactory.cpp b/src/armnn/backends/ClWorkloadFactory.cpp new file mode 100644 index 0000000000..4e565a05d7 --- /dev/null +++ b/src/armnn/backends/ClWorkloadFactory.cpp @@ -0,0 +1,473 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "ClWorkloadFactory.hpp" + +#include "armnn/Exceptions.hpp" +#include "armnn/Utils.hpp" + +#include <string> +#include "CpuTensorHandle.hpp" +#include "Layer.hpp" +#include "Layers.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include <arm_compute/core/CL/CLKernelLibrary.h> +#include <arm_compute/runtime/CL/CLScheduler.h> +#include "backends/MemCopyWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "ClWorkloads.hpp" +#endif + +#include "MakeWorkloadHelper.hpp" + +#include <boost/polymorphic_cast.hpp> +#include <boost/format.hpp> + +namespace armnn +{ + +bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported) +{ + return IWorkloadFactory::IsLayerSupported(Compute::GpuAcc, layer, dataType, outReasonIfUnsupported); +} + +#ifdef ARMCOMPUTECL_ENABLED + +void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters) +{ + ClTunedParameters* clTunedParametersImpl = boost::polymorphic_downcast<ClTunedParameters*>(clTunedParameters); + + cl::Device device; + cl::Context context; + cl::CommandQueue commandQueue; + + try + { + device = cl::Device::getDefault(); + context = cl::Context::getDefault(); + + bool enableProfiling = false; +#if ARMNN_PROFILING_ENABLED + enableProfiling = true; +#endif + if (clTunedParametersImpl && clTunedParametersImpl->m_Mode == IClTunedParameters::Mode::UpdateTunedParameters) + { + enableProfiling = true; // Needed for the CLTuner to work. + } + + if (enableProfiling) + { + // Create a new queue with profiling enabled + commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); + } + else + { + // Use default queue + commandQueue = cl::CommandQueue::getDefault(); + } + } + catch (const cl::Error& clError) + { + throw ClRuntimeUnavailableException(boost::str(boost::format( + "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" + ) % clError.what() % clError.err())); + } + + // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute. + arm_compute::CLKernelLibrary::get().init(".", context, device); + + arm_compute::ICLTuner* tuner = nullptr; + if (clTunedParameters) + { + tuner = &clTunedParametersImpl->m_Tuner; + } + arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner); +} + +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return std::make_unique<ClTensorHandle>(tensorInfo); +} + +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + BOOST_ASSERT(parent.GetType() == ITensorHandle::CL); + + arm_compute::Coordinates coords; + arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); + + coords.set_num_dimensions(subTensorShape.GetNumDimensions()); + for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) + { + // arm compute indexes tensor coords in reverse order + unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; + coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex])); + } + + return std::make_unique<ClSubTensorHandle>(static_cast<ClTensorHandle&>(parent).GetTensor(), shape, coords); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<CopyFromCpuToClFloat32Workload, CopyFromCpuToClUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<CopyFromClToCpuFloat32Workload, CopyFromClToCpuUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClActivationFloat32Workload, ClActivationUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClSoftmaxFloat32Workload, ClSoftmaxUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClSplitterFloat32Workload, ClSplitterUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClMergerFloat32Workload, ClMergerUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateFullyConnected( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<ClFullyConnectedFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClPermuteFloat32Workload, ClPermuteUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClPooling2dFloat32Workload, ClPooling2dUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClConvolution2dFloat32Workload, ClConvolution2dUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<ClDepthwiseConvolutionFloat32Workload, ClDepthwiseConvolutionUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClNormalizationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClAdditionFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMultiplication( + const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<ClMultiplicationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<ClBatchNormalizationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) + { + throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload"); + } + + // Create a workload that will copy tensor data from the inputs, which can have a number of different formats, + // to CL tensors. + switch (descriptor.m_Inputs[0]->GetType()) + { + case ITensorHandle::Cpu: + return MakeWorkload<CopyFromCpuToClFloat32Workload, CopyFromCpuToClUint8Workload>(descriptor, info); +#if ARMCOMPUTENEON_ENABLED + case ITensorHandle::Neon: + { + return MakeWorkload<CopyFromNeonToClFloat32Workload, CopyFromNeonToClUint8Workload>(descriptor, info); + } +#endif + default: + throw InvalidArgumentException("ClWorkloadFactory: Destination type not supported for MemCopy Workload."); + } +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateResizeBilinear( + const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClResizeBilinearFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClL2NormalizationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClConstantFloat32Workload, ClConstantUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClReshapeFloat32Workload, ClReshapeUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<ClFloorFloat32Workload, NullWorkload>(descriptor, info); +} + +#else // #if ARMCOMPUTECL_ENABLED + +void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters) +{ + // No CL support +} + +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return nullptr; +} + +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +#endif // #if ARMCOMPUTECL_ENABLED + +armnn::IClTunedParameters* IClTunedParameters::CreateRaw(armnn::IClTunedParameters::Mode mode) +{ + return new ClTunedParameters(mode); +} + +armnn::IClTunedParametersPtr IClTunedParameters::Create(armnn::IClTunedParameters::Mode mode) +{ + return IClTunedParametersPtr(CreateRaw(mode), &IClTunedParameters::Destroy); +} + +void IClTunedParameters::Destroy(IClTunedParameters* params) +{ + delete params; +} + +ClTunedParameters::ClTunedParameters(armnn::IClTunedParameters::Mode mode) + : m_Mode(mode) +#ifdef ARMCOMPUTECL_ENABLED + , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) +#endif +{ +} + +void ClTunedParameters::Load(const char* filename) +{ +#ifdef ARMCOMPUTECL_ENABLED + try + { + m_Tuner.load_from_file(filename); + } + catch (const std::exception& e) + { + throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + + e.what()); + } +#endif +} + +void ClTunedParameters::Save(const char* filename) const +{ +#ifdef ARMCOMPUTECL_ENABLED + try + { + m_Tuner.save_to_file(filename); + } + catch (const std::exception& e) + { + throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + + e.what()); + } +#endif +} + +} // namespace armnn diff --git a/src/armnn/backends/ClWorkloadFactory.hpp b/src/armnn/backends/ClWorkloadFactory.hpp new file mode 100644 index 0000000000..2477e23eeb --- /dev/null +++ b/src/armnn/backends/ClWorkloadFactory.hpp @@ -0,0 +1,129 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "WorkloadFactory.hpp" +#include "OutputHandler.hpp" +#include "armnn/IRuntime.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include <arm_compute/runtime/CL/CLTuner.h> +#endif + +namespace cl +{ +class Context; +class CommandQueue; +class Device; +} + +namespace armnn +{ + +class IClTunedParameters; + +// ARM Compute OpenCL workload factory +class ClWorkloadFactory : public IWorkloadFactory +{ +public: + virtual ~ClWorkloadFactory(){}; + + virtual Compute GetCompute() const override { return Compute::GpuAcc; } + + static bool IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported); + + void LoadOpenClRuntime(IClTunedParameters* clTunedParameters = nullptr); + + virtual bool SupportsSubTensors() const override { return true; } + + virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override; + + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + + virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; +}; + +class ClTunedParameters : public IClTunedParameters +{ +public: + ClTunedParameters(armnn::IClTunedParameters::Mode mode); + + virtual void Load(const char* filename); + virtual void Save(const char* filename) const; + + Mode m_Mode; + +#ifdef ARMCOMPUTECL_ENABLED + arm_compute::CLTuner m_Tuner; +#endif +}; + +} // namespace armnn diff --git a/src/armnn/backends/ClWorkloadUtils.hpp b/src/armnn/backends/ClWorkloadUtils.hpp new file mode 100644 index 0000000000..549a0bbc25 --- /dev/null +++ b/src/armnn/backends/ClWorkloadUtils.hpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Workload.hpp" +#include <arm_compute/core/CL/OpenCL.h> +#include <arm_compute/runtime/CL/CLFunctions.h> +#include <arm_compute/runtime/SubTensor.h> +#include "ArmComputeTensorUtils.hpp" + +namespace armnn +{ + +template <typename T> +void CopyArmComputeClTensorData(const T* srcData, arm_compute::CLTensor& dstTensor) +{ + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "MapClTensorForWriting"); + dstTensor.map(true); + } + + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyToClTensor"); + armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor); + } + + dstTensor.unmap(); +} + +template <typename T> +void InitialiseArmComputeClTensorData(arm_compute::CLTensor& clTensor, const T* data) +{ + armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor); + CopyArmComputeClTensorData<T>(data, clTensor); +} + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads.hpp b/src/armnn/backends/ClWorkloads.hpp new file mode 100644 index 0000000000..3b8cf50ace --- /dev/null +++ b/src/armnn/backends/ClWorkloads.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once +#include "backends/ClWorkloads/ClActivationFloat32Workload.hpp" +#include "backends/ClWorkloads/ClActivationUint8Workload.hpp" +#include "backends/ClWorkloads/ClAdditionFloat32Workload.hpp" +#include "backends/ClWorkloads/ClBaseConstantWorkload.hpp" +#include "backends/ClWorkloads/ClBaseMergerWorkload.hpp" +#include "backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp" +#include "backends/ClWorkloads/ClConstantFloat32Workload.hpp" +#include "backends/ClWorkloads/ClConstantUint8Workload.hpp" +#include "backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp" +#include "backends/ClWorkloads/ClConvolution2dUint8Workload.hpp" +#include "backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp" +#include "backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp" +#include "backends/ClWorkloads/ClFloorFloat32Workload.hpp" +#include "backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp" +#include "backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp" +#include "backends/ClWorkloads/ClMergerFloat32Workload.hpp" +#include "backends/ClWorkloads/ClMergerUint8Workload.hpp" +#include "backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp" +#include "backends/ClWorkloads/ClNormalizationFloat32Workload.hpp" +#include "backends/ClWorkloads/ClPermuteWorkload.hpp" +#include "backends/ClWorkloads/ClPooling2dFloat32Workload.hpp" +#include "backends/ClWorkloads/ClPooling2dUint8Workload.hpp" +#include "backends/ClWorkloads/ClReshapeFloat32Workload.hpp" +#include "backends/ClWorkloads/ClReshapeUint8Workload.hpp" +#include "backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp" +#include "backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp" +#include "backends/ClWorkloads/ClSoftmaxUint8Workload.hpp" +#include "backends/ClWorkloads/ClSplitterFloat32Workload.hpp" +#include "backends/ClWorkloads/ClSplitterUint8Workload.hpp"
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp new file mode 100644 index 0000000000..fb5d78425e --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClActivationFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" + +namespace armnn +{ + +ClActivationFloat32Workload::ClActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<ActivationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClActivationFloat32Workload", 1, 1); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); + + arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_ActivationLayer.configure(&input, &output, activationLayerInfo); +} + +void ClActivationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClActivationFloat32Workload_Execute"); + m_ActivationLayer.run(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp new file mode 100644 index 0000000000..9bab4202be --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +// Activation layer execution +class ClActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor> +{ +public: + ClActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLActivationLayer m_ActivationLayer; +}; + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp new file mode 100644 index 0000000000..3671dd7187 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClActivationUint8Workload.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +namespace armnn +{ + +ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<ActivationQueueDescriptor>(descriptor, info) +{ + + std::string reasonIfUnsupported; + if (!IsClActivationUint8Supported(&reasonIfUnsupported, m_Data.m_Parameters)) + { + throw InvalidArgumentException(reasonIfUnsupported); + } + + // Only BoundedReLu is supported (see IsClActivationUint8Supported) + arm_compute::ActivationLayerInfo layerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + m_Data.ValidateInputsOutputs("ClActivationUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, layerInfo); +} + +void ClActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClActivationUint8Workload_Execute"); + + m_ActivationLayer.run(); +} + +} //namespace Armnn + + diff --git a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp new file mode 100644 index 0000000000..3a9cceb298 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +// Activation layer execution +class ClActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor> +{ +public: + ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLActivationLayer m_ActivationLayer; +}; + +} //namespace armnn + + + diff --git a/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp new file mode 100644 index 0000000000..153167f172 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp @@ -0,0 +1,57 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClAdditionFloat32Workload.hpp" + +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClAdditionFloat32Workload::ClAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<AdditionQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClAdditionFloat32Workload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input0, &input1, &output, ms_AclConvertPolicy); +} + +void ClAdditionFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClAdditionFloat32Workload_Execute"); + m_Layer.run(); +} + +bool ClAdditionFloat32Workload::IsSupported(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = decltype(m_Layer)::validate(&aclInput0Info, + &aclInput1Info, + &aclOutputInfo, + ms_AclConvertPolicy); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp new file mode 100644 index 0000000000..37e50c2c86 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +class ClAdditionFloat32Workload : public Float32Workload<AdditionQueueDescriptor> +{ +public: + ClAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + + static bool IsSupported(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +private: + mutable arm_compute::CLArithmeticAddition m_Layer; + static constexpr arm_compute::ConvertPolicy ms_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; +}; + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp new file mode 100644 index 0000000000..4b72d92d72 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClBaseConstantWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +template class ClBaseConstantWorkload<DataType::Float32>; +template class ClBaseConstantWorkload<DataType::QuantisedAsymm8>; + +template<armnn::DataType dataType> +void ClBaseConstantWorkload<dataType>::Execute() const +{ + // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data + // on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not + // have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::CLTensor& output = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetTensor(); + + switch (dataType) + { + case DataType::Float32: + { + CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<float>(), output); + break; + } + case DataType::QuantisedAsymm8: + { + CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } +} + + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp new file mode 100644 index 0000000000..660842f375 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ +template <armnn::DataType DataType> +class ClBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataType> +{ +public: + ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload<ConstantQueueDescriptor, DataType>(descriptor, info) + , m_RanOnce(false) + { + } + + void Execute() const override; + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp new file mode 100644 index 0000000000..7542c62b47 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +// Base class template providing an implementation of the Merger layer common to all data types +template <armnn::DataType DataType> +class ClBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataType> +{ +public: + using TypedWorkload<MergerQueueDescriptor, DataType>::TypedWorkload; + + void Execute() const override + { + // With subtensors, merger is a no-op + } +}; + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp new file mode 100644 index 0000000000..fef841ced2 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +// Base class template providing an implementation of the Splitter layer common to all data types +template <armnn::DataType DataType> +class ClBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataType> +{ +public: + using TypedWorkload<SplitterQueueDescriptor, DataType>::TypedWorkload; + + void Execute() const override + { + // With subtensors, merger is a no-op + } +}; + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..dabd495d59 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClBatchNormalizationFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClBatchNormalizationFloat32Workload::ClBatchNormalizationFloat32Workload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info) +{ + BuildArmComputeTensor(m_Mean, m_Data.m_Mean->GetTensorInfo()); + BuildArmComputeTensor(m_Variance, m_Data.m_Variance->GetTensorInfo()); + BuildArmComputeTensor(m_Gamma, m_Data.m_Gamma->GetTensorInfo()); + BuildArmComputeTensor(m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Data.ValidateInputsOutputs("ClBatchNormalizationFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, &m_Mean, &m_Variance, &m_Beta, &m_Gamma, m_Data.m_Parameters.m_Eps); + + InitialiseArmComputeClTensorData(m_Mean, m_Data.m_Mean->GetConstTensor<float>()); + InitialiseArmComputeClTensorData(m_Variance, m_Data.m_Variance->GetConstTensor<float>()); + InitialiseArmComputeClTensorData(m_Beta, m_Data.m_Beta->GetConstTensor<float>()); + InitialiseArmComputeClTensorData(m_Gamma, m_Data.m_Gamma->GetConstTensor<float>()); +} + +void ClBatchNormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClBatchNormalizationFloat32Workload_Execute"); + m_Layer.run(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..ddbd0f05c0 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +class ClBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor> +{ +public: + ClBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + + using Float32Workload<BatchNormalizationQueueDescriptor>::Float32Workload; + void Execute() const override; + +private: + mutable arm_compute::CLBatchNormalizationLayer m_Layer; + + arm_compute::CLTensor m_Mean; + arm_compute::CLTensor m_Variance; + arm_compute::CLTensor m_Gamma; + arm_compute::CLTensor m_Beta; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp new file mode 100644 index 0000000000..99880d68a7 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClConstantFloat32Workload.hpp" +namespace armnn +{ + +void ClConstantFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConstantFloat32Workload_Execute"); + ClBaseConstantWorkload::Execute(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp new file mode 100644 index 0000000000..5f86d3b2b6 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "ClBaseConstantWorkload.hpp" + +namespace armnn +{ +class ClConstantFloat32Workload : public ClBaseConstantWorkload<DataType::Float32> +{ +public: + using ClBaseConstantWorkload<DataType::Float32>::ClBaseConstantWorkload; + void Execute() const override; +}; + + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp new file mode 100644 index 0000000000..078d4261fa --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClConstantUint8Workload.hpp" +namespace armnn +{ + +void ClConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConstantUint8Workload_Execute"); + ClBaseConstantWorkload::Execute(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp new file mode 100644 index 0000000000..3a53f1011e --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "ClBaseConstantWorkload.hpp" + +namespace armnn +{ + +class ClConstantUint8Workload : public ClBaseConstantWorkload<DataType::QuantisedAsymm8> +{ +public: + using ClBaseConstantWorkload<DataType::QuantisedAsymm8>::ClBaseConstantWorkload; + void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp new file mode 100644 index 0000000000..6f4069bcc0 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClConvolution2dFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClConvolution2dFloat32Workload::ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<Convolution2dQueueDescriptor>(descriptor, info) +{ + + // todo: check tensor shapes match + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + BuildArmComputeTensor(m_KernelTensor, weightInfo); + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + arm_compute::CLTensor* optionalBias = nullptr; + if (m_Data.m_Parameters.m_BiasEnabled) + { + BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + optionalBias = &m_BiasTensor; + } + + m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_pConvolutionLayer = std::make_unique<arm_compute::CLConvolutionLayer>(); + static_cast<arm_compute::CLConvolutionLayer*>(m_pConvolutionLayer.get())->configure(&input, + &m_KernelTensor, + optionalBias, + &output, + padStrideInfo); + + BOOST_ASSERT(m_pConvolutionLayer); + + InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<float>()); + + if (optionalBias) + { + InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<float>()); + } +} + +void ClConvolution2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConvolution2dFloat32Workload_Execute"); + BOOST_ASSERT(m_pConvolutionLayer); + + m_pConvolutionLayer->run(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp new file mode 100644 index 0000000000..29931056a8 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ +class ClConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor> +{ +public: + ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable std::unique_ptr<arm_compute::IFunction> m_pConvolutionLayer; + + arm_compute::CLTensor m_KernelTensor; + arm_compute::CLTensor m_BiasTensor; +}; + +} //namespace armnn + diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..a3c6ac9dca --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp @@ -0,0 +1,72 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClConvolution2dUint8Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info) +{ + + // todo: check tensor shapes match + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + BuildArmComputeTensor(m_KernelTensor, weightInfo); + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + arm_compute::CLTensor* optionalBias = nullptr; + if (m_Data.m_Parameters.m_BiasEnabled) + { + BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + optionalBias = &m_BiasTensor; + } + + m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + BOOST_ASSERT_MSG(IsClDirectConvolution2dSupported(weightInfo, m_Data.m_Parameters), + "Unsupported parameters for u8 convolution"); + + m_pConvolutionLayer = std::make_unique<arm_compute::CLDirectConvolutionLayer>(); + static_cast<arm_compute::CLDirectConvolutionLayer*>(m_pConvolutionLayer.get())->configure(&input, + &m_KernelTensor, + optionalBias, + &output, + padStrideInfo); + BOOST_ASSERT(m_pConvolutionLayer); + + InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>()); + + if (optionalBias) + { + InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<int32_t>()); + } +} + +void ClConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConvolution2dUint8Workload_Execute"); + BOOST_ASSERT(m_pConvolutionLayer); + + m_pConvolutionLayer->run(); +} + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..b2849d773b --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + + +namespace armnn +{ + +class ClConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor> +{ +public: + ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable std::unique_ptr<arm_compute::IFunction> m_pConvolutionLayer; + + arm_compute::CLTensor m_KernelTensor; + arm_compute::CLTensor m_BiasTensor; +}; + +} //namespace armnn + diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp new file mode 100644 index 0000000000..f31c73bc60 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClDepthwiseConvolutionFloat32Workload.hpp" +#include "ClDepthwiseConvolutionHelper.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +ClDepthwiseConvolutionFloat32Workload::ClDepthwiseConvolutionFloat32Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) +{ + InitClDepthwiseConvolutionWorkload(*this); +} + +void ClDepthwiseConvolutionFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClDepthwiseConvolutionFloat32Workload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp new file mode 100644 index 0000000000..8711f0c515 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +class ClDepthwiseConvolutionFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + ClDepthwiseConvolutionFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; + +private: + typedef float KernelDataType; + typedef float BiasDataType; + + mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; + + arm_compute::CLTensor m_KernelTensor; + arm_compute::CLTensor m_BiasTensor; + + template <typename WorkloadType> + friend void InitClDepthwiseConvolutionWorkload(WorkloadType& workload); +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp new file mode 100644 index 0000000000..cd7115773d --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp @@ -0,0 +1,91 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <armnn/TypesUtils.hpp> +#include "backends/ClLayerSupport.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClTensorHandle.hpp" + +namespace armnn +{ + +template <typename WorkloadType> +void InitClDepthwiseConvolutionWorkload(WorkloadType& workload) +{ + using T = typename WorkloadType::KernelDataType; + using B = typename WorkloadType::BiasDataType; + + auto& m_Data = workload.GetData(); + auto& m_KernelTensor = workload.m_KernelTensor; + auto& m_BiasTensor = workload.m_BiasTensor; + auto& m_pDepthwiseConvolutionLayer = workload.m_pDepthwiseConvolutionLayer; + + auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + std::string reasonIfUnsupported; + if (!IsClDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo)) + { + throw UnimplementedException(reasonIfUnsupported); + } + + armcomputetensorutils::BuildArmComputeTensor(m_KernelTensor, weightInfo); + + arm_compute::CLTensor* optionalBias = nullptr; + if (m_Data.m_Parameters.m_BiasEnabled) + { + armcomputetensorutils::BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + optionalBias = &m_BiasTensor; + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + std::string name = std::string("ClDepthwiseConvolution") + GetDataTypeName(GetDataType<T>()) + "Workload"; + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + //Check for optimisation opportunities. + bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3); + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>(); + static_cast<arm_compute::CLDepthwiseConvolutionLayer3x3*>(m_pDepthwiseConvolutionLayer.get())->configure( + &input, + &m_KernelTensor, + optionalBias, + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>(); + static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_pDepthwiseConvolutionLayer.get())->configure( + &input, + &m_KernelTensor, + optionalBias, + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor<T>()); + + if (optionalBias) + { + InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->template GetConstTensor<B>()); + } +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp new file mode 100644 index 0000000000..7e7c488c74 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClDepthwiseConvolutionUint8Workload.hpp" +#include "ClDepthwiseConvolutionHelper.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + + +ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) +{ + InitClDepthwiseConvolutionWorkload(*this); +} + +void ClDepthwiseConvolutionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClDepthwiseConvolutionUint8Workload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +} //namespace armnn + diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp new file mode 100644 index 0000000000..ee09ff3e58 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +class ClDepthwiseConvolutionUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; + +private: + typedef uint8_t KernelDataType; + typedef int32_t BiasDataType; + + mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; + + arm_compute::CLTensor m_KernelTensor; + arm_compute::CLTensor m_BiasTensor; + + template <typename WorkloadType> + friend void InitClDepthwiseConvolutionWorkload(WorkloadType& workload); +}; + +} //namespace armnn + + diff --git a/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp new file mode 100644 index 0000000000..882da50855 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClFloorFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" + +namespace armnn +{ + +ClFloorFloat32Workload::ClFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload<FloorQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClFloorFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void ClFloorFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClFloorFloat32Workload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp new file mode 100644 index 0000000000..532dd29884 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +class ClFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor> +{ +public: + ClFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLFloor m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp new file mode 100644 index 0000000000..96596b9d9c --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp @@ -0,0 +1,52 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClFullyConnectedFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClFullyConnectedFloat32Workload::ClFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info) +{ + + BuildArmComputeTensor(m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + arm_compute::CLTensor* optionalBiasTensor = nullptr; + if (m_Data.m_Parameters.m_BiasEnabled) + { + BuildArmComputeTensor(m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + optionalBiasTensor = &m_BiasesTensor; + } + + m_Data.ValidateInputsOutputs("ClFullyConnectedFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_FullyConnected.configure( + &input, &m_WeightsTensor, optionalBiasTensor, &output, m_Data.m_Parameters.m_TransposeWeightMatrix); + + // Allocate + InitialiseArmComputeClTensorData(m_WeightsTensor, m_Data.m_Weight->GetConstTensor<float>()); + + if (optionalBiasTensor) + { + InitialiseArmComputeClTensorData(*optionalBiasTensor, m_Data.m_Bias->GetConstTensor<float>()); + } +} + +void ClFullyConnectedFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClFullyConnectedFloat32Workload_Execute"); + m_FullyConnected.run(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp new file mode 100644 index 0000000000..def20e0831 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + + +namespace armnn +{ + +class ClFullyConnectedFloat32Workload : public armnn::Float32Workload<armnn::FullyConnectedQueueDescriptor> +{ +public: + ClFullyConnectedFloat32Workload(const armnn::FullyConnectedQueueDescriptor& descriptor, + const armnn::WorkloadInfo& info); + + using armnn::Float32Workload<armnn::FullyConnectedQueueDescriptor>::m_Data; + void Execute() const override; + +private: + mutable arm_compute::CLFullyConnectedLayer m_FullyConnected; + arm_compute::CLTensor m_WeightsTensor; + arm_compute::CLTensor m_BiasesTensor; +}; + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..e15db74ec9 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClL2NormalizationFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClL2NormalizationFloat32Workload::ClL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<L2NormalizationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClL2NormalizationFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); +} + +void ClL2NormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClL2NormalizationFloat32Workload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + + + diff --git a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..848803e2f0 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +class ClL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor> +{ +public: + ClL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + // Purposely not a CLL2Normalize function. See constructor. + mutable arm_compute::CLNormalizationLayer m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp new file mode 100644 index 0000000000..4d2d708a0e --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClMergerFloat32Workload.hpp" + + +namespace armnn +{ + +void ClMergerFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMergerFloat32Workload_Execute"); + ClBaseMergerWorkload::Execute(); +} + +} //namespace armnn + diff --git a/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp new file mode 100644 index 0000000000..9808d30ccf --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "ClBaseMergerWorkload.hpp" + +namespace armnn +{ + +class ClMergerFloat32Workload : public ClBaseMergerWorkload<armnn::DataType::Float32> +{ +public: + using ClBaseMergerWorkload<armnn::DataType::Float32>::ClBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn + + diff --git a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp new file mode 100644 index 0000000000..94a1d3c593 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClMergerUint8Workload.hpp" + + +namespace armnn +{ + +void ClMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMergerUint8Workload_Execute"); + ClBaseMergerWorkload<DataType::QuantisedAsymm8>::Execute(); +} + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp new file mode 100644 index 0000000000..1ddbb2ac52 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "ClBaseMergerWorkload.hpp" + +namespace armnn +{ + +class ClMergerUint8Workload : public ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8> +{ +public: + using ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8>::ClBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn + diff --git a/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp new file mode 100644 index 0000000000..405d109aa1 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClMultiplicationFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +ClMultiplicationFloat32Workload::ClMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<MultiplicationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClMultiplicationFloat32Workload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_PixelWiseMultiplication.configure(&input0, + &input1, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_NEAREST_EVEN); +} + +void ClMultiplicationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMultiplicationFloat32Workload_Execute"); + + // Execute the layer + m_PixelWiseMultiplication.run(); +} + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp new file mode 100644 index 0000000000..8e387118e8 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ +class ClMultiplicationFloat32Workload : public Float32Workload<MultiplicationQueueDescriptor> +{ +public: + ClMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + + using Float32Workload<MultiplicationQueueDescriptor>::Float32Workload; + void Execute() const override; + +private: + mutable arm_compute::CLPixelWiseMultiplication m_PixelWiseMultiplication; +}; + +} //namespace armnn + + + diff --git a/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..a163ec2883 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClNormalizationFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, const TensorInfo& output, + const NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo layerInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor); + + return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +ClNormalizationFloat32Workload::ClNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<NormalizationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClNormalizationFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::NormalizationLayerInfo normalizationInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters); + + m_NormalizationLayer.configure(&input, &output, normalizationInfo); +}; + +void ClNormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClNormalizationFloat32Workload_Execute"); + m_NormalizationLayer.run(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..cbd5fa92a9 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor); + +class ClNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor> +{ +public: + ClNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLNormalizationLayer m_NormalizationLayer; +}; + +} //namespace armnn + diff --git a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp new file mode 100644 index 0000000000..3147e95b2e --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClPermuteWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include <arm_compute/core/Error.h> + +namespace armnn +{ + +arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor) +{ + const armnn::PermutationVector& perm = descriptor.m_DimMappings; + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!perm.IsEqual({ 0U, 3U, 1U, 2U }) + && !perm.IsEqual({ 0U, 2U, 3U, 1U }) + && !perm.IsEqual({ 3U, 2U, 0U, 1U }), + "Only [0, 3, 1, 2], [0, 2, 3, 1] and [3, 2, 0, 1] permutations are supported"); + + return arm_compute::Status{}; +} + +template <armnn::DataType DataType> +ClPermuteWorkload<DataType>::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<PermuteQueueDescriptor, DataType>(descriptor, info) +{ + using armcomputetensorutils::BuildArmComputePermutationVector; + + m_Data.ValidateInputsOutputs(GetName(), 1, 1); + + const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + // Run the layer + m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); +} + +template <armnn::DataType DataType> +void ClPermuteWorkload<DataType>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, GetName() + "_Execute"); + m_PermuteFunction.run(); +} + +template class ClPermuteWorkload<DataType::Float32>; +template class ClPermuteWorkload<DataType::QuantisedAsymm8>; + +} // namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp new file mode 100644 index 0000000000..430c59524e --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +#include <armnn/TypesUtils.hpp> +#include <arm_compute/runtime/CL/functions/CLPermute.h> + +#include <string> + +namespace armnn +{ + +arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor); + +template <armnn::DataType DataType> +class ClPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType> +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("ClPermute") + GetDataTypeName(DataType) + "Workload"; + return name; + } + + ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data; + mutable arm_compute::CLPermute m_PermuteFunction; +}; + +using ClPermuteFloat32Workload = ClPermuteWorkload<DataType::Float32>; +using ClPermuteUint8Workload = ClPermuteWorkload<DataType::QuantisedAsymm8>; + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp new file mode 100644 index 0000000000..dbdc06f174 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClPooling2dBaseWorkload.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); + + return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +template <armnn::DataType dataType> +ClPooling2dBaseWorkload<dataType>::ClPooling2dBaseWorkload( + const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) + : TypedWorkload<Pooling2dQueueDescriptor, dataType>(descriptor, info) +{ + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); + + // Run the layer + m_PoolingLayer.configure(&input, &output, layerInfo); +} + +template class ClPooling2dBaseWorkload<DataType::Float32>; +template class ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>; + +} diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp new file mode 100644 index 0000000000..828f000505 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor); + +// Base class template providing an implementation of the Pooling2d layer common to all data types +template <armnn::DataType dataType> +class ClPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataType> +{ +public: + using TypedWorkload<Pooling2dQueueDescriptor, dataType>::m_Data; + + ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, + const std::string& name); + +protected: + mutable arm_compute::CLPoolingLayer m_PoolingLayer; +}; + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp new file mode 100644 index 0000000000..a7f5855b8a --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClPooling2dFloat32Workload.hpp" + +namespace armnn +{ + +ClPooling2dFloat32Workload::ClPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClPooling2dBaseWorkload<DataType::Float32>(descriptor, info, "ClPooling2dFloat32Workload") +{ +} + +void ClPooling2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClPooling2dFloat32Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp new file mode 100644 index 0000000000..3456a2cff8 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" +#include "backends/ClWorkloads//ClPooling2dBaseWorkload.hpp" + +namespace armnn +{ +class ClPooling2dFloat32Workload : public ClPooling2dBaseWorkload<DataType::Float32> +{ +public: + ClPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +}; + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..2d2109e252 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClPooling2dUint8Workload.hpp" + +namespace armnn +{ + +ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>(descriptor, info, "ClPooling2dUint8Workload") +{ +} + +void ClPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClPooling2dUint8Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + + diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..0875c7486c --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" +#include "backends/ClWorkloads/ClPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class ClPooling2dUint8Workload : public ClPooling2dBaseWorkload<DataType::QuantisedAsymm8> +{ +public: + ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +}; + +} //namespace armnn + + diff --git a/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp new file mode 100644 index 0000000000..7b4ad4415b --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClReshapeFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +ClReshapeFloat32Workload::ClReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload<ReshapeQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReshapeFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void ClReshapeFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClReshapeFloat32Workload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + diff --git a/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp new file mode 100644 index 0000000000..e344ee08ad --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +class ClReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor> +{ +public: + ClReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReshapeLayer m_Layer; +}; + +} //namespace armnn + + diff --git a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp new file mode 100644 index 0000000000..36cc1dec17 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClReshapeUint8Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ +ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReshapeUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output); +} + +void ClReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClReshapeUint8Workload_Execute"); + + m_Layer.run(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp new file mode 100644 index 0000000000..9e4199098c --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +// Reshape +class ClReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor> +{ +public: + ClReshapeUint8Workload( const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReshapeLayer m_Layer; +}; + +} //namespace armnn + + diff --git a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp new file mode 100644 index 0000000000..d71011a2e3 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp @@ -0,0 +1,36 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClResizeBilinearFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ArmComputeUtils.hpp" + +namespace armnn +{ + +ClResizeBilinearFloat32Workload::ClResizeBilinearFloat32Workload(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<ResizeBilinearQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClResizeBilinearFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ResizeBilinearLayer.configure(&input, &output, arm_compute::InterpolationPolicy::BILINEAR, + arm_compute::BorderMode::REPLICATE, arm_compute::PixelValue(0.f), + arm_compute::SamplingPolicy::TOP_LEFT); +}; + +void ClResizeBilinearFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClResizeBilinearFloat32Workload_Execute"); + m_ResizeBilinearLayer.run(); +} + + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp new file mode 100644 index 0000000000..5f70e71619 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp @@ -0,0 +1,23 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +class ClResizeBilinearFloat32Workload : public Float32Workload<ResizeBilinearQueueDescriptor> +{ +public: + ClResizeBilinearFloat32Workload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLScale m_ResizeBilinearLayer; +}; + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp new file mode 100644 index 0000000000..257e76a4df --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClSoftmaxFloat32Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +ClSoftmaxFloat32Workload::ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload<SoftmaxQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClSoftmaxFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); +} + +void ClSoftmaxFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSoftmaxFloat32Workload_Execute"); + m_SoftmaxLayer.run(); +} + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp new file mode 100644 index 0000000000..a26bbe851d --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +class ClSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor> +{ +public: + ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + + + diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..9e856fea94 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClSoftmaxUint8Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + const auto outputQuantization = output.info()->quantization_info(); + + if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) + { + throw InvalidArgumentException( + "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); + } + + m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); +} + +void ClSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSoftmaxUint8Workload_Execute"); + + m_SoftmaxLayer.run(); +} + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..07ee6256d8 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ +// Softmax +class ClSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor> +{ +public: + ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; +private: + + mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp new file mode 100644 index 0000000000..6221d56766 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClSplitterFloat32Workload.hpp" + +namespace armnn +{ + +void ClSplitterFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSplitterFloat32Workload_Execute"); + ClBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp new file mode 100644 index 0000000000..cfc7eaa3c2 --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "ClBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class ClSplitterFloat32Workload : public ClBaseSplitterWorkload<DataType::Float32> +{ +public: + using ClBaseSplitterWorkload<DataType::Float32>::ClBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp new file mode 100644 index 0000000000..3aa470894c --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClSplitterUint8Workload.hpp" + +namespace armnn +{ + +void ClSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSplitterUint8Workload_Execute"); + ClBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp new file mode 100644 index 0000000000..ed8b3cc69e --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "ClBaseSplitterWorkload.hpp" + +namespace armnn +{ +class ClSplitterUint8Workload : public ClBaseSplitterWorkload<DataType::QuantisedAsymm8> +{ +public: + using ClBaseSplitterWorkload<DataType::QuantisedAsymm8>::ClBaseSplitterWorkload; + virtual void Execute() const override; +}; +} //namespace armnn + + + diff --git a/src/armnn/backends/CpuTensorHandle.cpp b/src/armnn/backends/CpuTensorHandle.cpp new file mode 100644 index 0000000000..dd8176c9ec --- /dev/null +++ b/src/armnn/backends/CpuTensorHandle.cpp @@ -0,0 +1,107 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "armnn/Exceptions.hpp" +#include "CpuTensorHandle.hpp" + +#include <cstring> + +namespace armnn +{ + +ConstCpuTensorHandle::ConstCpuTensorHandle(const TensorInfo& tensorInfo) +: m_TensorInfo(tensorInfo) +, m_Memory(nullptr) +{ +} + +template <> +const void* ConstCpuTensorHandle::GetConstTensor() const +{ + return m_Memory; +} + +CpuTensorHandle::CpuTensorHandle(const TensorInfo& tensorInfo) +: ConstCpuTensorHandle(tensorInfo) +, m_MutableMemory(nullptr) +{ +} + +template <> +void* CpuTensorHandle::GetTensor() const +{ + return m_MutableMemory; +} + +ScopedCpuTensorHandle::ScopedCpuTensorHandle(const TensorInfo& tensorInfo) +: CpuTensorHandle(tensorInfo) +{ +} + +ScopedCpuTensorHandle::ScopedCpuTensorHandle(const ConstTensor& tensor) +: ScopedCpuTensorHandle(tensor.GetInfo()) +{ + CopyFrom(tensor.GetMemoryArea(), tensor.GetNumBytes()); +} + +ScopedCpuTensorHandle::ScopedCpuTensorHandle(const ScopedCpuTensorHandle& other) +: CpuTensorHandle(other.GetTensorInfo()) +{ + CopyFrom(other); +} + +ScopedCpuTensorHandle& ScopedCpuTensorHandle::operator=(const ScopedCpuTensorHandle& other) +{ + ::operator delete(GetTensor<void>()); + SetMemory(nullptr); + CopyFrom(other); + return *this; +} + +ScopedCpuTensorHandle::~ScopedCpuTensorHandle() +{ + ::operator delete(GetTensor<void>()); +} + +void ScopedCpuTensorHandle::Allocate() +{ + if (GetTensor<void>() == nullptr) + { + SetMemory(::operator new(GetTensorInfo().GetNumBytes())); + } + else + { + throw InvalidArgumentException("CpuTensorHandle::Allocate Trying to allocate a CpuTensorHandle" + "that already has allocated memory."); + } +} + +void ScopedCpuTensorHandle::CopyFrom(const ScopedCpuTensorHandle& other) +{ + CopyFrom(other.GetTensor<void>(), other.GetTensorInfo().GetNumBytes()); +} + +void ScopedCpuTensorHandle::CopyFrom(const void* srcMemory, unsigned int numBytes) +{ + BOOST_ASSERT(GetTensor<void>() == nullptr); + BOOST_ASSERT(GetTensorInfo().GetNumBytes() == numBytes); + + if (srcMemory) + { + Allocate(); + memcpy(GetTensor<void>(), srcMemory, numBytes); + } +} + +void PassthroughCpuTensorHandle::Allocate() +{ + throw InvalidArgumentException("PassthroughCpuTensorHandle::Allocate() should never be called"); +} + +void ConstPassthroughCpuTensorHandle::Allocate() +{ + throw InvalidArgumentException("ConstPassthroughCpuTensorHandle::Allocate() should never be called"); +} + +} // namespace armnn diff --git a/src/armnn/backends/CpuTensorHandle.hpp b/src/armnn/backends/CpuTensorHandle.hpp new file mode 100644 index 0000000000..4bf4439083 --- /dev/null +++ b/src/armnn/backends/CpuTensorHandle.hpp @@ -0,0 +1,142 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once +#include "CpuTensorHandleFwd.hpp" + +#include "armnn/TypesUtils.hpp" + +#include "OutputHandler.hpp" + +namespace armnn +{ + +// Abstract tensor handle wrapping a CPU-readable region of memory, interpreting it as tensor data. +class ConstCpuTensorHandle : public ITensorHandle +{ +public: + template <typename T> + const T* GetConstTensor() const + { + BOOST_ASSERT(GetTensorInfo().GetDataType() == GetDataType<T>()); + return reinterpret_cast<const T*>(m_Memory); + } + + const TensorInfo& GetTensorInfo() const + { + return m_TensorInfo; + } + + virtual ITensorHandle::Type GetType() const override + { + return ITensorHandle::Cpu; + } + +protected: + ConstCpuTensorHandle(const TensorInfo& tensorInfo); + + void SetConstMemory(const void* mem) { m_Memory = mem; } + +private: + ConstCpuTensorHandle(const ConstCpuTensorHandle& other) = delete; + ConstCpuTensorHandle& operator=(const ConstCpuTensorHandle& other) = delete; + + TensorInfo m_TensorInfo; + const void* m_Memory; +}; + +// Abstract specialization of ConstCpuTensorHandle that allows write access to the same data +class CpuTensorHandle : public ConstCpuTensorHandle +{ +public: + template <typename T> + T* GetTensor() const + { + BOOST_ASSERT(GetTensorInfo().GetDataType() == GetDataType<T>()); + return reinterpret_cast<T*>(m_MutableMemory); + } + +protected: + CpuTensorHandle(const TensorInfo& tensorInfo); + + void SetMemory(void* mem) + { + m_MutableMemory = mem; + SetConstMemory(m_MutableMemory); + } + +private: + + CpuTensorHandle(const CpuTensorHandle& other) = delete; + CpuTensorHandle& operator=(const CpuTensorHandle& other) = delete; + void* m_MutableMemory; +}; + +// A CpuTensorHandle that owns the wrapped memory region. +class ScopedCpuTensorHandle : public CpuTensorHandle +{ +public: + explicit ScopedCpuTensorHandle(const TensorInfo& tensorInfo); + + // Copies contents from Tensor + explicit ScopedCpuTensorHandle(const ConstTensor& tensor); + + ScopedCpuTensorHandle(const ScopedCpuTensorHandle& other); + ScopedCpuTensorHandle& operator=(const ScopedCpuTensorHandle& other); + ~ScopedCpuTensorHandle(); + + virtual void Allocate() override; + +private: + void CopyFrom(const ScopedCpuTensorHandle& other); + void CopyFrom(const void* srcMemory, unsigned int numBytes); +}; + +// A CpuTensorHandle that wraps an already allocated memory region. +// +// Clients must make sure the passed in memory region stays alive for the lifetime of +// the PassthroughCpuTensorHandle instance. +// +// Note there is no polymorphism to/from ConstPassthroughCpuTensorHandle +class PassthroughCpuTensorHandle : public CpuTensorHandle +{ +public: + PassthroughCpuTensorHandle(const TensorInfo& tensorInfo, void* mem) + : CpuTensorHandle(tensorInfo) + { + SetMemory(mem); + } + + virtual void Allocate() override; +}; + +// A ConstCpuTensorHandle that wraps an already allocated memory region. +// +// This allows users to pass in const memory to a network. +// Clients must make sure the passed in memory region stays alive for the lifetime of +// the PassthroughCpuTensorHandle instance. +// +// Note there is no polymorphism to/from PassthroughCpuTensorHandle +class ConstPassthroughCpuTensorHandle : public ConstCpuTensorHandle +{ +public: + ConstPassthroughCpuTensorHandle(const TensorInfo& tensorInfo, const void* mem) + : ConstCpuTensorHandle(tensorInfo) + { + SetConstMemory(mem); + } + + virtual void Allocate() override; +}; + + +// template specializations + +template <> +const void* ConstCpuTensorHandle::GetConstTensor() const; + +template <> +void* CpuTensorHandle::GetTensor() const; + +} // namespace armnn diff --git a/src/armnn/backends/CpuTensorHandleFwd.hpp b/src/armnn/backends/CpuTensorHandleFwd.hpp new file mode 100644 index 0000000000..93e9a7948c --- /dev/null +++ b/src/armnn/backends/CpuTensorHandleFwd.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +namespace armnn +{ + +class ConstCpuTensorHandle; +class CpuTensorHandle; +class ScopedCpuTensorHandle; +class PassthroughCpuTensorHandle; +class ConstPassthroughCpuTensorHandle; + +} // namespace armnn diff --git a/src/armnn/backends/ITensorHandle.hpp b/src/armnn/backends/ITensorHandle.hpp new file mode 100644 index 0000000000..b95dcc65e0 --- /dev/null +++ b/src/armnn/backends/ITensorHandle.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +namespace armnn +{ + +class ITensorHandle +{ +public: + enum Type + { + Cpu, + CL, + Neon + }; + + virtual ~ITensorHandle(){} + virtual void Allocate() = 0; + virtual ITensorHandle::Type GetType() const = 0; +}; + +} diff --git a/src/armnn/backends/MakeWorkloadHelper.hpp b/src/armnn/backends/MakeWorkloadHelper.hpp new file mode 100644 index 0000000000..a8729eb07c --- /dev/null +++ b/src/armnn/backends/MakeWorkloadHelper.hpp @@ -0,0 +1,59 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +namespace armnn +{ +namespace +{ + +// Make a workload of the specified WorkloadType +template<typename WorkloadType> +struct MakeWorkloadForType +{ + template<typename QueueDescriptorType> + static std::unique_ptr<WorkloadType> Func(const QueueDescriptorType& descriptor, const WorkloadInfo& info) + { + return std::make_unique<WorkloadType>(descriptor, info); + } +}; + +// Specialization for void workload type used for unsupported workloads. +template<> +struct MakeWorkloadForType<NullWorkload> +{ + template<typename QueueDescriptorType> + static std::unique_ptr<NullWorkload> Func(const QueueDescriptorType& descriptor, const WorkloadInfo& info) + { + return nullptr; + } +}; + +// Makes a workload for one the specified types based on the data type requirements of the tensorinfo. +// Specify type void as the WorkloadType for unsupported DataType/WorkloadType combos. +template <typename Float32Workload, typename Uint8Workload, typename QueueDescriptorType> +std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info) +{ + const DataType dataType = !info.m_InputTensorInfos.empty() ? + info.m_InputTensorInfos[0].GetDataType() + : info.m_OutputTensorInfos[0].GetDataType(); + + BOOST_ASSERT(info.m_InputTensorInfos.empty() || info.m_OutputTensorInfos.empty() + || info.m_InputTensorInfos[0].GetDataType() == info.m_OutputTensorInfos[0].GetDataType()); + + switch (dataType) + { + case DataType::Float32: + return MakeWorkloadForType<Float32Workload>::Func(descriptor, info); + case DataType::QuantisedAsymm8: + return MakeWorkloadForType<Uint8Workload>::Func(descriptor, info); + default: + BOOST_ASSERT_MSG(false, "Unknown DataType."); + return nullptr; + } +} + +} //namespace +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/MemCopyWorkload.cpp b/src/armnn/backends/MemCopyWorkload.cpp new file mode 100644 index 0000000000..09ffd9a08a --- /dev/null +++ b/src/armnn/backends/MemCopyWorkload.cpp @@ -0,0 +1,256 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "MemCopyWorkload.hpp" +#include "backends/CpuTensorHandle.hpp" + +#if ARMCOMPUTECL_ENABLED +#include "backends/ClTensorHandle.hpp" +#endif + +#if ARMCOMPUTENEON_ENABLED +#include "backends/NeonTensorHandle.hpp" +#endif + +#include <cstring> +#include <boost/cast.hpp> + +namespace armnn +{ + +namespace +{ + +template <typename SrcTensorHandleType, typename DstTensorHandleType> +void GatherTensorHandlePairs(const MemCopyQueueDescriptor& descriptor, + std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs) +{ + const unsigned int numInputs = boost::numeric_cast<unsigned int>(descriptor.m_Inputs.size()); + tensorHandlePairs.reserve(numInputs); + + for (unsigned int i = 0; i < numInputs; ++i) + { + SrcTensorHandleType* const srcTensorHandle = boost::polymorphic_downcast<SrcTensorHandleType*>( + descriptor.m_Inputs[i]); + DstTensorHandleType* const dstTensorHandle = boost::polymorphic_downcast<DstTensorHandleType*>( + descriptor.m_Outputs[i]); + + tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle); + } +} + +void CopyFromCpuToCpu(const ConstCpuTensorHandle& srcHandle, CpuTensorHandle& dstHandle) +{ + const unsigned int numBytes = srcHandle.GetTensorInfo().GetNumBytes(); + const void* const input = srcHandle.GetConstTensor<void>(); + void* const output = dstHandle.GetTensor<void>(); + std::memcpy(output, input, numBytes); +} + +#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED + +#include "backends/ArmComputeTensorUtils.hpp" + +template <armnn::DataType DataType> +void CopyFromCpuToAclBackend(const ConstCpuTensorHandle& srcHandle, arm_compute::ITensor& dstAclTensor) +{ + using T = ResolveType<DataType>; + armnn::armcomputetensorutils::CopyArmComputeITensorData(srcHandle.GetConstTensor<T>(), dstAclTensor); +} + +template <armnn::DataType DataType> +void CopyFromAclBackendToCpu(const arm_compute::ITensor& srcAclTensor, CpuTensorHandle& dstHandle) +{ + using T = ResolveType<DataType>; + armnn::armcomputetensorutils::CopyArmComputeITensorData(srcAclTensor, dstHandle.GetTensor<T>()); +} + +#endif // ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED + +} + +template <armnn::DataType DataType> +CopyFromCpuToCpuWorkload<DataType>::CopyFromCpuToCpuWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template <armnn::DataType DataType> +void CopyFromCpuToCpuWorkload<DataType>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "CopyFromCpuToCpuWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + CopyFromCpuToCpu(*pair.first, *pair.second); + } +} + +template class CopyFromCpuToCpuWorkload<DataType::Float32>; +template class CopyFromCpuToCpuWorkload<DataType::QuantisedAsymm8>; + +#if ARMCOMPUTECL_ENABLED + +template <armnn::DataType DataType> +CopyFromCpuToClWorkload<DataType>::CopyFromCpuToClWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template <armnn::DataType DataType> +void CopyFromCpuToClWorkload<DataType>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromCpuToClWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + IClTensorHandle& handle = *pair.second; + + handle.Map(true); + CopyFromCpuToAclBackend<DataType>(*pair.first, handle.GetTensor()); + handle.UnMap(); + } +} + +template class CopyFromCpuToClWorkload<DataType::Float32>; +template class CopyFromCpuToClWorkload<DataType::QuantisedAsymm8>; + + +template <armnn::DataType DataType> +CopyFromClToCpuWorkload<DataType>::CopyFromClToCpuWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template <armnn::DataType DataType> +void CopyFromClToCpuWorkload<DataType>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromClToCpuWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + IClTensorHandle& handle = *pair.first; + + handle.Map(true); + CopyFromAclBackendToCpu<DataType>(handle.GetTensor(), *pair.second); + handle.UnMap(); + } +} + +template class CopyFromClToCpuWorkload<DataType::Float32>; +template class CopyFromClToCpuWorkload<DataType::QuantisedAsymm8>; + +#endif // ARMCOMPUTECL_ENABLED + +#if ARMCOMPUTENEON_ENABLED + +template <armnn::DataType DataType> +CopyFromCpuToNeonWorkload<DataType>::CopyFromCpuToNeonWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template <armnn::DataType DataType> +void CopyFromCpuToNeonWorkload<DataType>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "CopyFromCpuToNeonWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + CopyFromCpuToAclBackend<DataType>(*pair.first, pair.second->GetTensor()); + } +} + +template class CopyFromCpuToNeonWorkload<DataType::Float32>; +template class CopyFromCpuToNeonWorkload<DataType::QuantisedAsymm8>; + +template <armnn::DataType DataType> +CopyFromNeonToCpuWorkload<DataType>::CopyFromNeonToCpuWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template <armnn::DataType DataType> +void CopyFromNeonToCpuWorkload<DataType>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "CopyFromNeonToCpuWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + CopyFromAclBackendToCpu<DataType>(pair.first->GetTensor(), *pair.second); + } +} + +template class CopyFromNeonToCpuWorkload<DataType::Float32>; +template class CopyFromNeonToCpuWorkload<DataType::QuantisedAsymm8>; + +#endif // ARMCOMPUTENEON_ENABLED + +#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED + +template <armnn::DataType DataType> +CopyFromNeonToClWorkload<DataType>::CopyFromNeonToClWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template <armnn::DataType DataType> +void CopyFromNeonToClWorkload<DataType>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromNeonToClWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + IClTensorHandle& handle = *pair.second; + + handle.Map(true); + handle.GetTensor().copy_from(pair.first->GetTensor()); + handle.UnMap(); + } +} + +template class CopyFromNeonToClWorkload<DataType::Float32>; +template class CopyFromNeonToClWorkload<DataType::QuantisedAsymm8>; + +template <armnn::DataType DataType> +CopyFromClToNeonWorkload<DataType>::CopyFromClToNeonWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<MemCopyQueueDescriptor, DataType>(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +template <armnn::DataType DataType> +void CopyFromClToNeonWorkload<DataType>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "CopyFromClToNeonWorkload_Execute"); + + for (const auto& pair : m_TensorHandlePairs) + { + IClTensorHandle& handle = *pair.first; + + handle.Map(true); + pair.second->GetTensor().copy_from(handle.GetTensor()); + handle.UnMap(); + } +} + +template class CopyFromClToNeonWorkload<DataType::Float32>; +template class CopyFromClToNeonWorkload<DataType::QuantisedAsymm8>; + +#endif // ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED + +} diff --git a/src/armnn/backends/MemCopyWorkload.hpp b/src/armnn/backends/MemCopyWorkload.hpp new file mode 100644 index 0000000000..7fcaf138c3 --- /dev/null +++ b/src/armnn/backends/MemCopyWorkload.hpp @@ -0,0 +1,136 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "CpuTensorHandleFwd.hpp" +#include "backends/Workload.hpp" + +#include <utility> + +namespace armnn +{ + +template <armnn::DataType DataType> +class CopyFromCpuToCpuWorkload : public TypedWorkload<MemCopyQueueDescriptor, DataType> +{ +public: + CopyFromCpuToCpuWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TensorHandlePair = std::pair<const ConstCpuTensorHandle*, CpuTensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +using CopyFromCpuToCpuFloat32Workload = CopyFromCpuToCpuWorkload<DataType::Float32>; +using CopyFromCpuToCpuUint8Workload = CopyFromCpuToCpuWorkload<DataType::QuantisedAsymm8>; + +#if ARMCOMPUTECL_ENABLED + +class IClTensorHandle; + +template <armnn::DataType DataType> +class CopyFromCpuToClWorkload : public TypedWorkload<MemCopyQueueDescriptor, DataType> +{ +public: + CopyFromCpuToClWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TensorHandlePair = std::pair<const ConstCpuTensorHandle*, IClTensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +using CopyFromCpuToClFloat32Workload = CopyFromCpuToClWorkload<DataType::Float32>; +using CopyFromCpuToClUint8Workload = CopyFromCpuToClWorkload<DataType::QuantisedAsymm8>; + +template <armnn::DataType DataType> +class CopyFromClToCpuWorkload : public TypedWorkload<MemCopyQueueDescriptor, DataType> +{ +public: + CopyFromClToCpuWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TensorHandlePair = std::pair<IClTensorHandle*, CpuTensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +using CopyFromClToCpuFloat32Workload = CopyFromClToCpuWorkload<DataType::Float32>; +using CopyFromClToCpuUint8Workload = CopyFromClToCpuWorkload<DataType::QuantisedAsymm8>; + +#endif // ARMCOMPUTECL_ENABLED + +#if ARMCOMPUTENEON_ENABLED + +class INeonTensorHandle; + +template <armnn::DataType DataType> +class CopyFromCpuToNeonWorkload : public TypedWorkload<MemCopyQueueDescriptor, DataType> +{ +public: + CopyFromCpuToNeonWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +protected: + using TensorHandlePair = std::pair<const ConstCpuTensorHandle*, INeonTensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +using CopyFromCpuToNeonFloat32Workload = CopyFromCpuToNeonWorkload<DataType::Float32>; +using CopyFromCpuToNeonUint8Workload = CopyFromCpuToNeonWorkload<DataType::QuantisedAsymm8>; + +template <armnn::DataType DataType> +class CopyFromNeonToCpuWorkload : public TypedWorkload<MemCopyQueueDescriptor, DataType> +{ +public: + CopyFromNeonToCpuWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +protected: + using TensorHandlePair = std::pair<const INeonTensorHandle*, CpuTensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +using CopyFromNeonToCpuFloat32Workload = CopyFromNeonToCpuWorkload<DataType::Float32>; +using CopyFromNeonToCpuUint8Workload = CopyFromNeonToCpuWorkload<DataType::QuantisedAsymm8>; + +#endif + +#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED + +template <armnn::DataType DataType> +class CopyFromNeonToClWorkload : public TypedWorkload<MemCopyQueueDescriptor, DataType> +{ +public: + CopyFromNeonToClWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TensorHandlePair = std::pair<const INeonTensorHandle*, IClTensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +using CopyFromNeonToClFloat32Workload = CopyFromNeonToClWorkload<DataType::Float32>; +using CopyFromNeonToClUint8Workload = CopyFromNeonToClWorkload<DataType::QuantisedAsymm8>; + +template <armnn::DataType DataType> +class CopyFromClToNeonWorkload : public TypedWorkload<MemCopyQueueDescriptor, DataType> +{ +public: + CopyFromClToNeonWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TensorHandlePair = std::pair<IClTensorHandle*, INeonTensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +using CopyFromClToNeonFloat32Workload = CopyFromClToNeonWorkload<DataType::Float32>; +using CopyFromClToNeonUint8Workload = CopyFromClToNeonWorkload<DataType::QuantisedAsymm8>; + +#endif + +} diff --git a/src/armnn/backends/NeonLayerSupport.cpp b/src/armnn/backends/NeonLayerSupport.cpp new file mode 100644 index 0000000000..382b15e277 --- /dev/null +++ b/src/armnn/backends/NeonLayerSupport.cpp @@ -0,0 +1,398 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonLayerSupport.hpp" + +#include "LayerSupportCommon.hpp" +#include "InternalTypes.hpp" + +#include <armnn/Descriptors.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> + +#include <boost/core/ignore_unused.hpp> + +#ifdef ARMCOMPUTENEON_ENABLED +#include "NeonWorkloads/NeonPooling2dBaseWorkload.hpp" +#include "NeonWorkloads/NeonPermuteWorkload.hpp" +#endif + +using namespace boost; + +namespace armnn +{ +bool IsNeonActivationUint8Supported(std::string* reasonIfUnsupported, const ActivationDescriptor& parameters) +{ + if (parameters.m_Function != ActivationFunction::BoundedReLu) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Unsupported activation function, only BoundedReLu is supported)"; + } + + return false; + } + + return true; +} + +bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) +{ + // See arm_compute::NEDirectConvolutionLayer documentation for the supported cases, + // and complement with NEDirectConvolutionLayerKernel::configure() implementation + + // Only 1x1 is using direct convolution. Performance results and details are in: + // https://jira.arm.com/browse/IVGCVSW-1003 + // Measurements were taken as of clframework: f105ab972135bcd21304883eff040d7e587099bc + + const bool dataTypeSupported = (weightInfo.GetDataType() == armnn::DataType::Float32); + + // Strides: 1|2|3 + const bool strideSupported = (desc.m_StrideX == 1 || desc.m_StrideX == 2 || desc.m_StrideX == 3) && + (desc.m_StrideY == 1 || desc.m_StrideY == 2 || desc.m_StrideY == 3); + + auto paddingLargerThan = [](const Convolution2dDescriptor& desc, unsigned int value) + { + return desc.m_PadLeft > value || desc.m_PadRight > value || desc.m_PadTop > value || desc.m_PadBottom > value; + }; + + // Supported sizes and padding + const bool sizeAndPaddingSupported = + // Pad > 0 not supported for 1x1 weights + (weightInfo.GetShape()[2] == 1 && weightInfo.GetShape()[3] == 1 && !paddingLargerThan(desc, 0u)); + + const bool preferDirectConvolution = dataTypeSupported && + strideSupported && + sizeAndPaddingSupported && + // NEDirectConvolutionLayerKernel doesn't support NULL bias + desc.m_BiasEnabled; + return preferDirectConvolution; +} + +bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, const NormalizationDescriptor& parameters) +{ + if (parameters.m_NormMethodType != NormalizationAlgorithmMethod::LocalBrightness) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Unsupported normalisation method type, only LocalBrightness is supported"; + } + return false; + } + if (parameters.m_NormSize % 2 == 0) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Normalization size must be an odd number."; + } + return false; + } + + return true; +} + +bool IsNeonBackendSupported(std::string* reasonIfUnsupported) +{ +#if ARMCOMPUTENEON_ENABLED + return true; +#else + if (reasonIfUnsupported != nullptr) + { + *reasonIfUnsupported = "The armnn library has been built without NEON support"; + } + return false; +#endif +} + +template<typename Float32Func, typename Uint8Func, typename ... Params> +bool IsSupportedForDataTypeNeon(std::string* reasonIfUnsupported, + DataType dataType, + Float32Func floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + return IsNeonBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + dataType, + floatFuncPtr, + uint8FuncPtr, + std::forward<Params>(params)...); +} + +#if ARMCOMPUTENEON_ENABLED +template<class FuncType, class... Args> +inline bool IsWorkloadSupported(FuncType& func, std::string* reasonIfUnsupported, Args&&... args) +{ + arm_compute::Status aclStatus = func(std::forward<Args>(args)...); + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + return supported; +} + +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); +#else +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsNeonBackendSupported(reasonIfUnsupported); +#endif + +bool IsActivationSupportedNeon(const TensorInfo& input, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<const ActivationDescriptor&>, + &IsNeonActivationUint8Supported, + descriptor); +} + +bool IsNeonDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, + const DepthwiseConvolution2dDescriptor& parameters, + const TensorInfo& weights) +{ + ignore_unused(weights); + + if (parameters.m_StrideX < 1 || parameters.m_StrideX > 3) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "m_StrideX can only be 1, 2 or 3"; + } + return false; + } + + // weights.GetShape()[0] = channel multiplier + if (weights.GetShape()[0] != 1) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Channel multiplier only supports the value 1 in the NEON backend"; + } + return false; + } + + if (parameters.m_PadLeft != parameters.m_PadRight || parameters.m_PadTop != parameters.m_PadBottom) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Asymmetric padding for depthwise convolution currently not supported " + "in Neon backend"; + } + return false; + } + + return true; +} + +bool IsAdditionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsBatchNormalizationSupportedNeon(const TensorInfo& input, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsConstantSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsConvolution2dSupportedNeon(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &IsNeonDepthwiseConvolution2dDescParamsSupported, + &IsNeonDepthwiseConvolution2dDescParamsSupported, + descriptor, + weights); +} + +bool IsFullyConnectedSupportedNeon(const TensorInfo& input, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsInputSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsL2NormalizationSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFunc<>); +} + +bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsMultiplicationSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &IsNeonNormalizationDescParamsSupported, + &FalseFuncU8<const NormalizationDescriptor&>, + descriptor); +} + +bool IsOutputSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPermuteSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPermuteWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsPooling2dSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsResizeBilinearSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + return false; +} + +bool IsSoftmaxSupportedNeon(const TensorInfo& input, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsSplitterSupportedNeon(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFakeQuantizationSupportedNeon(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(descriptor); + return false; +} + +bool IsReshapeSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFloorSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +} diff --git a/src/armnn/backends/NeonLayerSupport.hpp b/src/armnn/backends/NeonLayerSupport.hpp new file mode 100644 index 0000000000..b2ac49ae0d --- /dev/null +++ b/src/armnn/backends/NeonLayerSupport.hpp @@ -0,0 +1,109 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/DescriptorsFwd.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> + +namespace armnn +{ + +bool IsNeonActivationUint8Supported(std::string* reasonIfUnsupported, const ActivationDescriptor& parameters); + +bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc); + +bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, + const NormalizationDescriptor& parameters); + +bool IsActivationSupportedNeon(const TensorInfo& input, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported); + +bool IsNeonDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, + const DepthwiseConvolution2dDescriptor& parameters, + const TensorInfo& weights); + +bool IsAdditionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +bool IsBatchNormalizationSupportedNeon(const TensorInfo& input, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConstantSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvolution2dSupportedNeon(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported = nullptr); + +bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported = nullptr); + +bool IsFullyConnectedSupportedNeon(const TensorInfo& input, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsInputSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsL2NormalizationSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsMultiplicationSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + std::string* reasonIfUnsupported = nullptr); + +bool IsNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsOutputSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsPermuteSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPooling2dSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsResizeBilinearSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsSoftmaxSupportedNeon(const TensorInfo& input, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsSplitterSupportedNeon(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsFakeQuantizationSupportedNeon(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsReshapeSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsFloorSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +} diff --git a/src/armnn/backends/NeonTensorHandle.hpp b/src/armnn/backends/NeonTensorHandle.hpp new file mode 100644 index 0000000000..684a5e1bfc --- /dev/null +++ b/src/armnn/backends/NeonTensorHandle.hpp @@ -0,0 +1,80 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "OutputHandler.hpp" +#include "ArmComputeTensorUtils.hpp" + +#include <arm_compute/runtime/Tensor.h> +#include <arm_compute/runtime/SubTensor.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/Coordinates.h> + + +namespace armnn +{ + +class INeonTensorHandle : public ITensorHandle +{ +public: + virtual arm_compute::ITensor& GetTensor() = 0; + virtual arm_compute::ITensor const& GetTensor() const = 0; + virtual arm_compute::DataType GetDataType() const = 0; +}; + +class NeonTensorHandle : public INeonTensorHandle +{ +public: + NeonTensorHandle(const TensorInfo& tensorInfo) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); + } + + arm_compute::ITensor& GetTensor() override { return m_Tensor; } + arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } + virtual void Allocate() override + { + armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor); + }; + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::Neon; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + +private: + arm_compute::Tensor m_Tensor; +}; + +class NeonSubTensorHandle : public INeonTensorHandle +{ +public: + NeonSubTensorHandle(arm_compute::ITensor& parent, + const arm_compute::TensorShape& shape, + const arm_compute::Coordinates& coords) + : m_Tensor(&parent, shape, coords) + { + } + + arm_compute::ITensor& GetTensor() override { return m_Tensor; } + arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } + virtual void Allocate() override + { + }; + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::Neon; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + +private: + arm_compute::SubTensor m_Tensor; +}; + +} diff --git a/src/armnn/backends/NeonWorkloadFactory.cpp b/src/armnn/backends/NeonWorkloadFactory.cpp new file mode 100644 index 0000000000..384284114f --- /dev/null +++ b/src/armnn/backends/NeonWorkloadFactory.cpp @@ -0,0 +1,360 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "NeonWorkloadFactory.hpp" +#include "armnn/Utils.hpp" +#include "CpuTensorHandle.hpp" +#include "Layer.hpp" +#include "Layers.hpp" + +#ifdef ARMCOMPUTENEON_ENABLED +#include "MemCopyWorkload.hpp" +#include "NeonTensorHandle.hpp" +#include "NeonWorkloadUtils.hpp" +#include "NeonWorkloads.hpp" +#endif + +#include "MakeWorkloadHelper.hpp" + +#include <boost/polymorphic_cast.hpp> + +namespace armnn +{ + +bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported) +{ + return IWorkloadFactory::IsLayerSupported(Compute::CpuAcc, layer, dataType, outReasonIfUnsupported); +} + +#ifdef ARMCOMPUTENEON_ENABLED + +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + BOOST_ASSERT(parent.GetType() == ITensorHandle::Neon); + + const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); + + arm_compute::Coordinates coords; + coords.set_num_dimensions(subTensorShape.GetNumDimensions()); + for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) + { + // arm compute indexes tensor coords in reverse order + unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; + coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex])); + } + + return std::make_unique<NeonSubTensorHandle>(boost::polymorphic_downcast<INeonTensorHandle*>(&parent)->GetTensor(), + shape, coords); +} + +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return std::make_unique<NeonTensorHandle>(tensorInfo); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<CopyFromCpuToNeonFloat32Workload, CopyFromCpuToNeonUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<CopyFromNeonToCpuFloat32Workload, CopyFromNeonToCpuUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonActivationFloat32Workload, NeonActivationUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonSoftmaxFloat32Workload, NeonSoftmaxUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonSplitterFloat32Workload, NeonSplitterUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonMergerFloat32Workload, NeonMergerUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonFullyConnectedFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonPermuteFloat32Workload, NeonPermuteUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonPooling2dFloat32Workload, NeonPooling2dUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonConvolution2dFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonDepthwiseConvolutionFloat32Workload, NeonDepthwiseConvolutionUint8Workload>( + descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization( + const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonNormalizationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonAdditionFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication( + const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonMultiplicationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonBatchNormalizationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) + { + throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload"); + } + + // Create a workload that will copy tensor data from the inputs, which can have a number of different formats, + // to Neon tensors. + switch (descriptor.m_Inputs[0]->GetType()) + { + case ITensorHandle::Cpu: + return MakeWorkload<CopyFromCpuToNeonFloat32Workload, CopyFromCpuToNeonUint8Workload>(descriptor, info); +#if ARMCOMPUTECL_ENABLED + case ITensorHandle::CL: + { + return MakeWorkload<CopyFromClToNeonFloat32Workload, CopyFromClToNeonUint8Workload>(descriptor, info); + } +#endif + default: + throw InvalidArgumentException("NeonWorkloadFactory: Destination type not supported for MemCopy Workload."); + } +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear( + const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonL2NormalizationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonConstantFloat32Workload, NeonConstantUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonReshapeFloat32Workload, NeonReshapeUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonFloorFloat32Workload, NullWorkload>(descriptor, info); +} + +#else // Compiled without ArmCompute libs + +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + return nullptr; +} + +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchNormalization(const BatchNormalizationQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +#endif + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloadFactory.hpp b/src/armnn/backends/NeonWorkloadFactory.hpp new file mode 100644 index 0000000000..0e39cfe8b1 --- /dev/null +++ b/src/armnn/backends/NeonWorkloadFactory.hpp @@ -0,0 +1,100 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "WorkloadFactory.hpp" +#include "OutputHandler.hpp" + +#include <boost/core/ignore_unused.hpp> + +namespace armnn +{ + +// Neon workload factory +class NeonWorkloadFactory : public IWorkloadFactory +{ +public: + virtual ~NeonWorkloadFactory() { }; + + virtual Compute GetCompute() const override { return Compute::CpuAcc; } + + static bool IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const override { return true; } + + virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override; + + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + + virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloadUtils.cpp b/src/armnn/backends/NeonWorkloadUtils.cpp new file mode 100644 index 0000000000..0a108a8d38 --- /dev/null +++ b/src/armnn/backends/NeonWorkloadUtils.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "NeonWorkloadUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/NeonTensorHandle.hpp" + +#include "armnn/Utils.hpp" +#include "armnn/Exceptions.hpp" + +#include "Layers.hpp" + +#include <cstring> +#include <boost/assert.hpp> +#include <boost/cast.hpp> +#include <boost/format.hpp> + +#include "Profiling.hpp" + +#include "NeonLayerSupport.hpp" +#include "../../../include/armnn/Types.hpp" + +using namespace armnn::armcomputetensorutils; + +namespace armnn +{ + +// Allocate a tensor and copy the contents in data to the tensor contents +template<typename T> +void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const T* data) +{ + InitialiseArmComputeTensorEmpty(tensor); + CopyArmComputeITensorData(data, tensor); +} + +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const float* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const uint8_t* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const int32_t* data); + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloadUtils.hpp b/src/armnn/backends/NeonWorkloadUtils.hpp new file mode 100644 index 0000000000..ec7688237a --- /dev/null +++ b/src/armnn/backends/NeonWorkloadUtils.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Workload.hpp" + +#include "backends/NeonTensorHandle.hpp" + +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/runtime/NEON/NEFunctions.h" +#include <arm_compute/runtime/SubTensor.h> + +#include <boost/cast.hpp> + +namespace armnn +{ +class Layer; + +template<typename T> +void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const T* data); + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads.hpp b/src/armnn/backends/NeonWorkloads.hpp new file mode 100644 index 0000000000..7e9e885adc --- /dev/null +++ b/src/armnn/backends/NeonWorkloads.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once +#include "backends/NeonWorkloads/NeonActivationFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonActivationUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonBaseConstantWorkload.hpp" +#include "backends/NeonWorkloads/NeonBaseMergerWorkload.hpp" +#include "backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp" +#include "backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonConstantFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonConstantUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonFloorFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonMergerFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonMergerUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonPermuteWorkload.hpp" +#include "backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp" +#include "backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonReshapeUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonSplitterUint8Workload.hpp" diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp new file mode 100644 index 0000000000..39e55d5761 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonActivationFloat32Workload.hpp" +#include "backends/ArmComputeUtils.hpp" + + +namespace armnn +{ +NeonActivationFloat32Workload::NeonActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<ActivationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonActivationFloat32Workload", 1, 1); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, activationLayerInfo); +} + +void NeonActivationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonActivationFloat32Workload_Execute"); + m_ActivationLayer.run(); +} + +} //namespace armnn + diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp new file mode 100644 index 0000000000..6fa83ea2f6 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ +class NeonActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor> +{ +public: + NeonActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::NEActivationLayer m_ActivationLayer; +}; +} //namespace armnn + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp new file mode 100644 index 0000000000..27c37e9425 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonActivationUint8Workload.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/NeonLayerSupport.hpp" + +namespace armnn +{ +NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<ActivationQueueDescriptor>(descriptor, info) +{ + + std::string reasonIfUnsupported; + if (!IsNeonActivationUint8Supported(&reasonIfUnsupported, m_Data.m_Parameters)) + { + throw InvalidArgumentException(reasonIfUnsupported); + } + + // Only BoundedReLu is supported (see IsNeonActivationUint8Supported) + arm_compute::ActivationLayerInfo layerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + m_Data.ValidateInputsOutputs("NeonActivationUint8Workload", 1, 1); + + arm_compute::ITensor& input = static_cast<NeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<NeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, layerInfo); +} + +void NeonActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonActivationUint8Workload_Execute"); + + m_ActivationLayer.run(); +} +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.hpp new file mode 100644 index 0000000000..af655db3d6 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor> +{ +public: + NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEActivationLayer m_ActivationLayer; +}; + +} //namespace armnn + + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp new file mode 100644 index 0000000000..d1fb64093d --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonAdditionFloat32Workload.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +NeonAdditionFloat32Workload::NeonAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<AdditionQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonAdditionFloat32Workload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_AddLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); +} + +void NeonAdditionFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonAdditionFloat32Workload_Execute"); + m_AddLayer.run(); +} + +} //namespace armnn + diff --git a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp new file mode 100644 index 0000000000..5b75b502a3 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ +class NeonAdditionFloat32Workload : public Float32Workload<AdditionQueueDescriptor> +{ +public: + NeonAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEArithmeticAddition m_AddLayer; +}; + +} //namespace armnn + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp new file mode 100644 index 0000000000..247ebfc5dd --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp @@ -0,0 +1,72 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/ArmComputeTensorUtils.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/NeonTensorHandle.hpp> +#include <backends/Workload.hpp> + +#include <boost/cast.hpp> + +namespace armnn +{ + +// Base class template providing an implementation of the Constant layer common to all data types +template <armnn::DataType DataFormat> +class NeonBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataFormat> +{ +public: + NeonBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload<ConstantQueueDescriptor, DataFormat>(descriptor, info) + , m_RanOnce(false) + { + } + + virtual void Execute() const override + { + using namespace armcomputetensorutils; + + // The intermediate tensor held by the corresponding layer output handler can be initialised with the + // given data on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer + // may not have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::ITensor& output = + boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetTensor(); + + switch (DataFormat) + { + case DataType::Float32: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<float>(), output); + break; + } + case DataType::QuantisedAsymm8: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } + } + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp new file mode 100644 index 0000000000..24640c7adb --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/Workload.hpp> + +namespace armnn +{ +// Base class template providing an implementation of the Merger layer common to all data types +template <armnn::DataType DataType> +class NeonBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataType> +{ +public: + using TypedWorkload<MergerQueueDescriptor, DataType>::TypedWorkload; + + virtual void Execute() const override + { + // With subtensors, merger is a no-op + } +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp new file mode 100644 index 0000000000..769905b48b --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/Workload.hpp> + +namespace armnn +{ + +// Base class template providing an implementation of the Splitter layer common to all data types +template <armnn::DataType DataType> +class NeonBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataType> +{ +public: + using TypedWorkload<SplitterQueueDescriptor, DataType>::TypedWorkload; + + virtual void Execute() const override + { + // With subtensors, splitter is a no-op + } +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..f107c8137f --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp @@ -0,0 +1,45 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonBatchNormalizationFloat32Workload.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonBatchNormalizationFloat32Workload::NeonBatchNormalizationFloat32Workload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonBatchNormalizationFloat32Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + BuildArmComputeTensor(m_Mean, m_Data.m_Mean->GetTensorInfo()); + BuildArmComputeTensor(m_Variance, m_Data.m_Variance->GetTensorInfo()); + BuildArmComputeTensor(m_Gamma, m_Data.m_Gamma->GetTensorInfo()); + BuildArmComputeTensor(m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Layer.configure( + &input, &output, &m_Mean, &m_Variance, &m_Beta, &m_Gamma, m_Data.m_Parameters.m_Eps); + + InitialiseArmComputeTensorData(m_Mean, m_Data.m_Mean->GetConstTensor<float>()); + InitialiseArmComputeTensorData(m_Variance, m_Data.m_Variance->GetConstTensor<float>()); + InitialiseArmComputeTensorData(m_Gamma, m_Data.m_Gamma->GetConstTensor<float>()); + InitialiseArmComputeTensorData(m_Beta, m_Data.m_Beta->GetConstTensor<float>()); +} + +void NeonBatchNormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonBatchNormalizationFloat32Workload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + + diff --git a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..2050d42859 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor> +{ +public: + NeonBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEBatchNormalizationLayer m_Layer; + + arm_compute::Tensor m_Mean; + arm_compute::Tensor m_Variance; + arm_compute::Tensor m_Gamma; + arm_compute::Tensor m_Beta; +}; + +} //namespace armnn + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp new file mode 100644 index 0000000000..8b203fbf3a --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonConstantFloat32Workload.hpp" + +namespace armnn +{ + +void NeonConstantFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConstantFloat32Workload_Execute"); + NeonBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp new file mode 100644 index 0000000000..4ea4dfe127 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "NeonBaseConstantWorkload.hpp" + +namespace armnn +{ + +class NeonConstantFloat32Workload : public NeonBaseConstantWorkload<DataType::Float32> +{ +public: + using NeonBaseConstantWorkload<DataType::Float32>::NeonBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp new file mode 100644 index 0000000000..f6dfaeb7a7 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonConstantUint8Workload.hpp" + +namespace armnn +{ + +void NeonConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConstantUint8Workload_Execute"); + NeonBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp new file mode 100644 index 0000000000..729bb35499 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "NeonBaseConstantWorkload.hpp" + +namespace armnn +{ + +class NeonConstantUint8Workload : public NeonBaseConstantWorkload<DataType::QuantisedAsymm8> +{ +public: + using NeonBaseConstantWorkload<DataType::QuantisedAsymm8>::NeonBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp new file mode 100644 index 0000000000..5099965a24 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp @@ -0,0 +1,88 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/NeonLayerSupport.hpp" + +#include "NeonConvolution2dBaseWorkload.hpp" + +namespace armnn +{ + +template<armnn::DataType dataType> +NeonConvolution2dBaseWorkload<dataType>::NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<Convolution2dQueueDescriptor, dataType>(descriptor, info) +{ + using arm_compute::NEDirectConvolutionLayer; + using namespace armcomputetensorutils; + + ValidateData(); + + // todo: check tensor shapes match + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + BuildArmComputeTensor(m_KernelTensor, m_Data.m_Weight->GetTensorInfo()); + + arm_compute::Tensor* optionalBiasTensor = nullptr; + if (m_Data.m_Parameters.m_BiasEnabled) + { + BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + optionalBiasTensor = &m_BiasTensor; + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + const bool preferDirectConvolution = + IsNeonDirectConvolutionPreferred(m_Data.m_Weight->GetTensorInfo(), + m_Data.m_Parameters); + + if (preferDirectConvolution) + { + auto directConvolutionLayer = std::make_unique<arm_compute::NEDirectConvolutionLayer>(); + directConvolutionLayer->configure(&input, + &m_KernelTensor, + optionalBiasTensor, + &output, + padStrideInfo); + m_ConvolutionLayer.reset(directConvolutionLayer.release()); + } + else + { + auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>(); + convolutionLayer->configure(&input, + &m_KernelTensor, + optionalBiasTensor, + &output, + padStrideInfo); + m_ConvolutionLayer.reset(convolutionLayer.release()); + } + BOOST_ASSERT(m_ConvolutionLayer); + + using Type = ResolveType<dataType>; + + InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor<Type>()); + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor<Type>()); + } +} + +// Generate known implementations for linker +template class NeonConvolution2dBaseWorkload<DataType::Float32>; +template class NeonConvolution2dBaseWorkload<DataType::QuantisedAsymm8>; + +} //namespace armnn + + diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp new file mode 100644 index 0000000000..37740511ba --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include <backends/Workload.hpp> +#include <backends/NeonWorkloadUtils.hpp> + +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/NeonLayerSupport.hpp" + +namespace armnn +{ + +template<armnn::DataType dataType> +class NeonConvolution2dBaseWorkload : public TypedWorkload<Convolution2dQueueDescriptor, dataType> +{ +public: + using TypedWorkload<Convolution2dQueueDescriptor, dataType>::m_Data; + + NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info); + + virtual void ValidateData() const {}; + +protected: + std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer; + arm_compute::Tensor m_KernelTensor; + arm_compute::Tensor m_BiasTensor; +}; +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp new file mode 100644 index 0000000000..b4650ac011 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp @@ -0,0 +1,36 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonConvolution2dFloat32Workload.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/NeonLayerSupport.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonConvolution2dFloat32Workload::NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonConvolution2dBaseWorkload(descriptor, info) +{} + + +void NeonConvolution2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConvolution2dFloat32Workload_Execute"); + m_ConvolutionLayer->run(); +} + +void NeonConvolution2dFloat32Workload::ValidateData() const +{ + m_Data.ValidateInputsOutputs("NeonConvolution2dFloat32Workload", 1, 1); +} + + + +} //namespace armnn + + diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp new file mode 100644 index 0000000000..f4d95d623f --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> +#include "NeonConvolution2dBaseWorkload.hpp" + +namespace armnn +{ +class NeonConvolution2dFloat32Workload : public NeonConvolution2dBaseWorkload<DataType::Float32> +{ +public: + NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + void ValidateData() const override; +}; +} //namespace armnn + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp new file mode 100644 index 0000000000..11e31c727a --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp @@ -0,0 +1,91 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonDepthwiseConvolutionFloat32Workload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonDepthwiseConvolutionFloat32Workload::NeonDepthwiseConvolutionFloat32Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + std::string reasonIfUnsupported; + if (!IsNeonDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo)) + { + throw UnimplementedException(reasonIfUnsupported); + } + + BuildArmComputeTensor(m_KernelTensor, weightInfo); + + arm_compute::Tensor* optionalBias = nullptr; + if (m_Data.m_Parameters.m_BiasEnabled) + { + BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + optionalBias = &m_BiasTensor; + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionFloat32Workload", 1, 1); + + arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + &m_KernelTensor, + optionalBias, + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + &m_KernelTensor, + optionalBias, + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<float>()); + + if (optionalBias) + { + InitialiseArmComputeTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<float>()); + } +} + +void NeonDepthwiseConvolutionFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "NeonDepthwiseConvolutionFloat32Workload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp new file mode 100644 index 0000000000..f9e295f568 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonDepthwiseConvolutionFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + NeonDepthwiseConvolutionFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; + + arm_compute::Tensor m_KernelTensor; + arm_compute::Tensor m_BiasTensor; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp new file mode 100644 index 0000000000..bd034c4f80 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp @@ -0,0 +1,91 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonDepthwiseConvolutionUint8Workload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + std::string reasonIfUnsupported; + if (!IsNeonDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo)) + { + throw UnimplementedException(reasonIfUnsupported); + } + + BuildArmComputeTensor(m_KernelTensor, weightInfo); + + arm_compute::Tensor* optionalBias = nullptr; + if (m_Data.m_Parameters.m_BiasEnabled) + { + BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + optionalBias = &m_BiasTensor; + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionUint8Workload", 1, 1); + + arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + &m_KernelTensor, + optionalBias, + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + &m_KernelTensor, + optionalBias, + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>()); + + if (optionalBias) + { + InitialiseArmComputeTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<int32_t>()); + } +} + +void NeonDepthwiseConvolutionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "NeonDepthwiseConvolutionUint8Workload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp new file mode 100644 index 0000000000..9cf272e9f5 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonDepthwiseConvolutionUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + NeonDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; + + arm_compute::Tensor m_KernelTensor; + arm_compute::Tensor m_BiasTensor; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp new file mode 100644 index 0000000000..a5eec5cadb --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonFloorFloat32Workload.hpp" + +namespace armnn +{ +NeonFloorFloat32Workload::NeonFloorFloat32Workload(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<FloorQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonFloorFloat32Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonFloorFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonFloorFloat32Workload_Execute"); + m_Layer.run(); +} +} //namespace armnn + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp new file mode 100644 index 0000000000..f876f1e1bb --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor> +{ +public: + NeonFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFloor m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp new file mode 100644 index 0000000000..54c4e4333c --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonFullyConnectedFloat32Workload.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonFullyConnectedFloat32Workload::NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonFullyConnectedFloat32Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + BuildArmComputeTensor(m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + arm_compute::Tensor* optionalBiasTensor = nullptr; + if (m_Data.m_Parameters.m_BiasEnabled) + { + BuildArmComputeTensor(m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + optionalBiasTensor = &m_BiasesTensor; + } + + // Construct + m_FullyConnectedLayer.configure( + &input, &m_WeightsTensor, optionalBiasTensor, &output, m_Data.m_Parameters.m_TransposeWeightMatrix); + + // Allocate + InitialiseArmComputeTensorData(m_WeightsTensor, m_Data.m_Weight->GetConstTensor<float>()); + + if (optionalBiasTensor) + { + InitialiseArmComputeTensorData(*optionalBiasTensor, m_Data.m_Bias->GetConstTensor<float>()); + } +} + +void NeonFullyConnectedFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonFullyConnectedFloat32Workload_Execute"); + m_FullyConnectedLayer.run(); +} + +} //namespace armnn + + diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp new file mode 100644 index 0000000000..f9230f1d93 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonFullyConnectedFloat32Workload : public Float32Workload<FullyConnectedQueueDescriptor> +{ +public: + NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer; + arm_compute::Tensor m_WeightsTensor; + arm_compute::Tensor m_BiasesTensor; +}; + +} //namespace armnn + + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..085f58a219 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonL2NormalizationFloat32Workload.hpp" +#include "backends/ArmComputeUtils.hpp" + + +namespace armnn +{ + +NeonL2NormalizationFloat32Workload::NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<L2NormalizationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonL2NormalizationFloat32Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); +} + +void NeonL2NormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonL2NormalizationFloat32Workload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..6cab28366a --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ +class NeonL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor> +{ +public: + NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + // Purposely not a NEL2Normalize function. See constructor. + mutable arm_compute::NENormalizationLayer m_Layer; +}; +} //namespace armnn + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp new file mode 100644 index 0000000000..7520e8768e --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonMergerFloat32Workload.hpp" + +namespace armnn +{ + +void NeonMergerFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClMergerFloat32Workload_Execute"); + NeonBaseMergerWorkload::Execute(); +} + +} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp new file mode 100644 index 0000000000..5c889c2af0 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "NeonBaseMergerWorkload.hpp" + +namespace armnn +{ + +class NeonMergerFloat32Workload : public NeonBaseMergerWorkload<DataType::Float32> +{ +public: + using NeonBaseMergerWorkload<DataType::Float32>::NeonBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp new file mode 100644 index 0000000000..51578e5bff --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonMergerUint8Workload.hpp" + +namespace armnn +{ + +void NeonMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClMergerUint8Workload_Execute"); + NeonBaseMergerWorkload::Execute(); +} + +} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.hpp new file mode 100644 index 0000000000..fd1e6b72b9 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "NeonBaseMergerWorkload.hpp" + +namespace armnn +{ + +class NeonMergerUint8Workload : public NeonBaseMergerWorkload<DataType::QuantisedAsymm8> +{ +public: + using NeonBaseMergerWorkload<DataType::QuantisedAsymm8>::NeonBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp new file mode 100644 index 0000000000..58ce7b74ba --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonMultiplicationFloat32Workload.hpp" + + +namespace armnn +{ + +NeonMultiplicationFloat32Workload::NeonMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<MultiplicationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonMultiplicationFloat32Workload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + m_PixelWiseMultiplication.configure(&input1, + &input2, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + +void NeonMultiplicationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonMultiplicationFloat32Workload_Execute"); + m_PixelWiseMultiplication.run(); +} + +} //namespace armnn + + diff --git a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp new file mode 100644 index 0000000000..ed5ead3700 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonMultiplicationFloat32Workload : public Float32Workload<MultiplicationQueueDescriptor> +{ +public: + NeonMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEPixelWiseMultiplication m_PixelWiseMultiplication; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..739390d5a1 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonNormalizationFloat32Workload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/ArmComputeUtils.hpp" + +namespace armnn +{ + +NeonNormalizationFloat32Workload::NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<NormalizationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonNormalizationFloat32Workload", 1, 1); + std::string reasonIfUnsupported; + if (!IsNeonNormalizationDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters)) + { + throw UnimplementedException(reasonIfUnsupported); + } + + // input and output tensors have to have the same dimensionality + if (info.m_InputTensorInfos[0].GetShape()[1] != info.m_OutputTensorInfos[0].GetShape()[1] + || info.m_InputTensorInfos[0].GetShape()[0] != info.m_OutputTensorInfos[0].GetShape()[0] + || info.m_InputTensorInfos[0].GetShape()[3] != info.m_OutputTensorInfos[0].GetShape()[3] + || info.m_InputTensorInfos[0].GetShape()[2] != info.m_OutputTensorInfos[0].GetShape()[2]) + { + throw InvalidArgumentException("Normalization requires input and output tensors to have equal dimensionality."); + } + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + const arm_compute::NormType normType = + ConvertNormalizationAlgorithmChannelToAclNormType(m_Data.m_Parameters.m_NormChannelType); + arm_compute::NormalizationLayerInfo normalizationInfo(normType, + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K, + false); + + m_NormalizationLayer.configure(&input, &output, normalizationInfo); +} + +void NeonNormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonNormalizationFloat32Workload_Execute"); + m_NormalizationLayer.run(); +} + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..12a0fa80b2 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor> +{ +public: + NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NENormalizationLayer m_NormalizationLayer; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp new file mode 100644 index 0000000000..e0a0457422 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonPermuteWorkload.hpp" +#include "backends/NeonTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include <arm_compute/core/Error.h> + +namespace armnn +{ + +arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + const armnn::PermutationVector& mappings = descriptor.m_DimMappings; + + return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo, + armcomputetensorutils::BuildArmComputePermutationVector(mappings)); +} + +template <armnn::DataType DataType> +NeonPermuteWorkload<DataType>::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<PermuteQueueDescriptor, DataType>(descriptor, info) +{ + using armcomputetensorutils::BuildArmComputePermutationVector; + + m_Data.ValidateInputsOutputs(GetName(), 1, 1); + + const arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + // Run the layer + m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); +} + +template <armnn::DataType DataType> +void NeonPermuteWorkload<DataType>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, GetName() + "_Execute"); + m_PermuteFunction.run(); +} + +template class NeonPermuteWorkload<DataType::Float32>; +template class NeonPermuteWorkload<DataType::QuantisedAsymm8>; + +} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp new file mode 100644 index 0000000000..56e8719d6c --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +#include <armnn/TypesUtils.hpp> +#include <arm_compute/runtime/NEON/functions/NEPermute.h> + +#include <string> + +namespace armnn +{ +arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, const TensorInfo& output, + const PermuteDescriptor& descriptor); + +template <armnn::DataType DataType> +class NeonPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType> +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("NeonPermute") + GetDataTypeName(DataType) + "Workload"; + return name; + } + + NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data; + mutable arm_compute::NEPermute m_PermuteFunction; +}; + +using NeonPermuteFloat32Workload = NeonPermuteWorkload<DataType::Float32>; +using NeonPermuteUint8Workload = NeonPermuteWorkload<DataType::QuantisedAsymm8>; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp new file mode 100644 index 0000000000..6d6a492155 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonPooling2dBaseWorkload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/NeonTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); + + return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +template <armnn::DataType dataType> +NeonPooling2dBaseWorkload<dataType>::NeonPooling2dBaseWorkload( + const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) + : TypedWorkload<Pooling2dQueueDescriptor, dataType>(descriptor, info) +{ + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); + + m_PoolingLayer.configure(&input, &output, layerInfo); +} + +template class NeonPooling2dBaseWorkload<DataType::Float32>; +template class NeonPooling2dBaseWorkload<DataType::QuantisedAsymm8>; + +} //namespace armnn + diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp new file mode 100644 index 0000000000..9461982f86 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor); + +// Base class template providing an implementation of the Pooling2d layer common to all data types +template <armnn::DataType dataType> +class NeonPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataType> +{ +public: + using TypedWorkload<Pooling2dQueueDescriptor, dataType>::m_Data; + + NeonPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, + const std::string& name); + +protected: + mutable arm_compute::NEPoolingLayer m_PoolingLayer; +}; + + +} //namespace armnn + + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp new file mode 100644 index 0000000000..ba2aa20924 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonPooling2dFloat32Workload.hpp" + + + +namespace armnn +{ + +NeonPooling2dFloat32Workload::NeonPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonPooling2dBaseWorkload<armnn::DataType::Float32>(descriptor, info, "NeonPooling2dFloat32Workload") +{ +} + +void NeonPooling2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonPooling2dFloat32Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp new file mode 100644 index 0000000000..6cfc9cc96f --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> +#include "NeonPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonPooling2dFloat32Workload : public NeonPooling2dBaseWorkload<armnn::DataType::Float32> +{ +public: + NeonPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..0778794081 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonPooling2dUint8Workload.hpp" + + + +namespace armnn +{ + +NeonPooling2dUint8Workload::NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonPooling2dBaseWorkload<armnn::DataType::QuantisedAsymm8>(descriptor, info, "NeonPooling2dUint8Workload") +{ +} + +void NeonPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonPooling2dUint8Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..fa8182125b --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <armnn/Types.hpp> +#include "NeonPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonPooling2dUint8Workload : public NeonPooling2dBaseWorkload<armnn::DataType::QuantisedAsymm8> +{ +public: + NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp new file mode 100644 index 0000000000..317d16f6bd --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonReshapeFloat32Workload.hpp" + + + +namespace armnn +{ + +NeonReshapeFloat32Workload::NeonReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<ReshapeQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReshapeFloat32Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonReshapeFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonReshapeFloat32Workload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp new file mode 100644 index 0000000000..27f4aea9e7 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor> +{ +public: + NeonReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + mutable arm_compute::NEReshapeLayer m_Layer; +}; + +} //namespace armnn + + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp new file mode 100644 index 0000000000..06f57c1e0f --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonReshapeUint8Workload.hpp" + + + + +namespace armnn +{ +NeonReshapeUint8Workload::NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReshapeUint8Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonReshapeUint8Workload_Execute"); + m_Layer.run(); +} +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp new file mode 100644 index 0000000000..66b7d914b1 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor> +{ +public: + NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEReshapeLayer m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp new file mode 100644 index 0000000000..229562ece2 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonSoftmaxFloat32Workload.hpp" + +namespace armnn +{ +NeonSoftmaxFloat32Workload::NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32Workload<SoftmaxQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonSoftmaxFloat32Workload", 1, 1); + + // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); +} + +void NeonSoftmaxFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSoftmaxFloat32Workload_Execute"); + m_SoftmaxLayer.run(); +} +} //namespace armnn + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp new file mode 100644 index 0000000000..c466a0f9c6 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor> +{ +public: + NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..a66b0343ff --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonSoftmaxUint8Workload.hpp" + + + +namespace armnn +{ +NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + const auto outputQuantization = output.info()->quantization_info(); + + if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) + { + throw InvalidArgumentException( + "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); + } + + m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); +} + +void NeonSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClSoftmaxUint8Workload_Execute"); + + m_SoftmaxLayer.run(); +} +} //namespace armnn + diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..bccd82a850 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <backends/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor> +{ +public: + NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + + + + diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp new file mode 100644 index 0000000000..13701d2ed3 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonSplitterFloat32Workload.hpp" + +namespace armnn +{ + +void NeonSplitterFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSplitterFloat32Workload_Execute"); + NeonBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp new file mode 100644 index 0000000000..432f5de4eb --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "NeonBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class NeonSplitterFloat32Workload : public NeonBaseSplitterWorkload<DataType::Float32> +{ +public: + using NeonBaseSplitterWorkload<DataType::Float32>::NeonBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp new file mode 100644 index 0000000000..90d24d3ffd --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonSplitterUint8Workload.hpp" + +namespace armnn +{ + +void NeonSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSplitterUint8Workload_Execute"); + NeonBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp new file mode 100644 index 0000000000..1c97c74e02 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "NeonBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class NeonSplitterUint8Workload : public NeonBaseSplitterWorkload<DataType::QuantisedAsymm8> +{ +public: + using NeonBaseSplitterWorkload<DataType::QuantisedAsymm8>::NeonBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/OutputHandler.cpp b/src/armnn/backends/OutputHandler.cpp new file mode 100644 index 0000000000..54afe565a9 --- /dev/null +++ b/src/armnn/backends/OutputHandler.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "OutputHandler.hpp" + +#include <boost/assert.hpp> +#include <boost/log/trivial.hpp> + +#include "backends/WorkloadFactory.hpp" +#include "backends/WorkloadDataCollector.hpp" +#include "backends/ITensorHandle.hpp" + +namespace armnn +{ + +void OutputHandler::SetTensorInfo(const TensorInfo& tensorInfo) +{ + m_TensorInfo = tensorInfo; + m_bTensorInfoSet = true; +} + +void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory) +{ + m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo); +} + +void OutputHandler::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const +{ + dataCollector.Push(m_TensorHandle.get(), m_TensorInfo); +} + +void OutputHandler::AllocateTensors() +{ + if (m_TensorHandle) + { + m_TensorHandle->Allocate(); + } +} + +} // namespace armnn diff --git a/src/armnn/backends/OutputHandler.hpp b/src/armnn/backends/OutputHandler.hpp new file mode 100644 index 0000000000..9cc87c6095 --- /dev/null +++ b/src/armnn/backends/OutputHandler.hpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "backends/WorkloadDataFwd.hpp" + +#include <string> +#include <vector> + +#include <memory> +#include <set> + +#include <boost/assert.hpp> + +#include "armnn/INetwork.hpp" +#include "armnn/Types.hpp" +#include "armnn/Descriptors.hpp" +#include "armnn/Tensor.hpp" +#include "ITensorHandle.hpp" + +namespace armnn +{ + +class ITensorHandle; +class IWorkloadFactory; +class OutputSlot; +class WorkloadDataCollector; + +class OutputHandler +{ +public: + /// @brief Sets the TensorInfo used by this output handler. + /// @param tensorInfo TensorInfo for the output. + void SetTensorInfo(const TensorInfo& tensorInfo); + + /// @brief Create tensor handlers used by the intermediate tensors. Does not allocate memory. + /// @param factory Factory to be used for handler creation. + void CreateTensorHandles(const IWorkloadFactory& factory); + + /// @brief Get the matching TensorInfo for the output + /// @return Reference to the output TensorInfo. + const TensorInfo& GetTensorInfo() const { return m_TensorInfo; } + + /// @brief Get the allocated tensor memory. + /// @return Pointer to the tensor memory + ITensorHandle* GetData() const { return m_TensorHandle.get(); } + + /// Fill the outputs for a given queue descriptor + void CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const; + + void SetData(std::unique_ptr<ITensorHandle> data) { m_TensorHandle = std::move(data); } + + /// @brief Allocate memory for all the tensors assigned to the handlers + void AllocateTensors(); + + /// @brief Returns true if SetTensorInfo() has been called at least once on this. + bool IsTensorInfoSet() const { return m_bTensorInfoSet; } +private: + std::unique_ptr<ITensorHandle> m_TensorHandle; + TensorInfo m_TensorInfo; + bool m_bTensorInfoSet = false; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefLayerSupport.cpp b/src/armnn/backends/RefLayerSupport.cpp new file mode 100644 index 0000000000..964c18e8ea --- /dev/null +++ b/src/armnn/backends/RefLayerSupport.cpp @@ -0,0 +1,262 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "LayerSupportCommon.hpp" +#include "RefLayerSupport.hpp" +#include <armnn/Descriptors.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> + +#include <boost/core/ignore_unused.hpp> + +#include "InternalTypes.hpp" + +using namespace boost; + +namespace armnn +{ + +template<typename Float32Func, typename Uint8Func, typename ... Params> +bool IsSupportedForDataTypeRef(std::string* reasonIfUnsupported, + DataType dataType, + Float32Func floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + return IsSupportedForDataTypeGeneric(reasonIfUnsupported, + dataType, + floatFuncPtr, + uint8FuncPtr, + std::forward<Params>(params)...); +} + +bool IsActivationSupportedRef(const TensorInfo& input, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsAdditionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsBatchNormalizationSupportedRef(const TensorInfo& input, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsConstantSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsConvolution2dSupportedRef(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + ignore_unused(weights); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFullyConnectedSupportedRef(const TensorInfo& input, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsInputSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsL2NormalizationSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsMergerSupportedRef(const std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsMultiplicationSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsNormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsOutputSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPermuteSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPooling2dSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsResizeBilinearSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsSoftmaxSupportedRef(const TensorInfo& input, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsSplitterSupportedRef(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFakeQuantizationSupportedRef(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsReshapeSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFloorSupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +} diff --git a/src/armnn/backends/RefLayerSupport.hpp b/src/armnn/backends/RefLayerSupport.hpp new file mode 100644 index 0000000000..4a329aef34 --- /dev/null +++ b/src/armnn/backends/RefLayerSupport.hpp @@ -0,0 +1,98 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/DescriptorsFwd.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> + +namespace armnn +{ + +bool IsActivationSupportedRef(const TensorInfo& input, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsAdditionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsBatchNormalizationSupportedRef(const TensorInfo& input, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConstantSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvolution2dSupportedRef(const TensorInfo& input, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported = nullptr); + +bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + std::string* reasonIfUnsupported = nullptr); + +bool IsFullyConnectedSupportedRef(const TensorInfo& input, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsInputSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsL2NormalizationSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsMergerSupportedRef(const std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsMultiplicationSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + std::string* reasonIfUnsupported = nullptr); + +bool IsNormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsOutputSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsPermuteSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPooling2dSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsResizeBilinearSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsSoftmaxSupportedRef(const TensorInfo& input, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsSplitterSupportedRef(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsFakeQuantizationSupportedRef(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsReshapeSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsFloorSupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +} diff --git a/src/armnn/backends/RefWorkloadFactory.cpp b/src/armnn/backends/RefWorkloadFactory.cpp new file mode 100644 index 0000000000..46502d8142 --- /dev/null +++ b/src/armnn/backends/RefWorkloadFactory.cpp @@ -0,0 +1,231 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "CpuTensorHandle.hpp" +#include "RefWorkloadFactory.hpp" +#include "RefWorkloads.hpp" +#include "Layer.hpp" +#include "Layers.hpp" +#include "MemCopyWorkload.hpp" +#include "MakeWorkloadHelper.hpp" + +#include <boost/log/trivial.hpp> + +namespace armnn +{ + +template <typename F32Workload, typename U8Workload, typename QueueDescriptorType> +std::unique_ptr<IWorkload> RefWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor, + const WorkloadInfo& info) const +{ + if (!IsOperationQueueDescriptor(descriptor) || m_OperationWorkloadsAllowed) + { + return armnn::MakeWorkload<F32Workload, U8Workload>(descriptor, info); + } + else + { + return std::unique_ptr<IWorkload>(); + } +} + +RefWorkloadFactory::RefWorkloadFactory(bool operationWorkloadsAllowed) + : m_OperationWorkloadsAllowed(operationWorkloadsAllowed) +{ +} + +bool RefWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported) +{ + return IWorkloadFactory::IsLayerSupported(Compute::CpuRef, layer, dataType, outReasonIfUnsupported); +} + +std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return std::make_unique<ScopedCpuTensorHandle>(tensorInfo); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (info.m_InputTensorInfos.empty() ) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Input cannot be zero length"); + } + if (info.m_OutputTensorInfos.empty()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Output cannot be zero length"); + } + + if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateInput: data input and output differ in byte count."); + } + + return MakeWorkload<CopyFromCpuToCpuFloat32Workload, CopyFromCpuToCpuUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (info.m_InputTensorInfos.empty() ) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Input cannot be zero length"); + } + if (info.m_OutputTensorInfos.empty()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Output cannot be zero length"); + } + if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: data input and output differ in byte count."); + } + + return MakeWorkload<CopyFromCpuToCpuFloat32Workload, CopyFromCpuToCpuUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefActivationFloat32Workload, RefActivationUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefSoftmaxFloat32Workload, RefSoftmaxUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefSplitterFloat32Workload, RefSplitterUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefMergerFloat32Workload, RefMergerUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateFullyConnected( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<RefFullyConnectedFloat32Workload, RefFullyConnectedUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefPermuteFloat32Workload, RefPermuteUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefPooling2dFloat32Workload, RefPooling2dUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateConvolution2d( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<RefConvolution2dFloat32Workload, RefConvolution2dUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<RefDepthwiseConvolution2dFloat32Workload, + RefDepthwiseConvolution2dUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateNormalization( + const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<RefNormalizationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefAdditionFloat32Workload, RefAdditionUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMultiplication( + const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<RefMultiplicationFloat32Workload, RefMultiplicationUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<RefBatchNormalizationFloat32Workload, RefBatchNormalizationUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty()) + { + throw InvalidArgumentException("RefWorkloadFactory: CreateMemCopy() expected an input tensor."); + } + // Create a workload that will copy tensor data from the inputs, which can have a number of different formats, + // to CPU tensors. + switch (descriptor.m_Inputs[0]->GetType()) + { +#if ARMCOMPUTECL_ENABLED + case ITensorHandle::CL: + { + return MakeWorkload<CopyFromClToCpuFloat32Workload, CopyFromClToCpuUint8Workload>(descriptor, info); + } +#endif +#if ARMCOMPUTENEON_ENABLED + case ITensorHandle::Neon: + { + return MakeWorkload<CopyFromNeonToCpuFloat32Workload, CopyFromNeonToCpuUint8Workload>(descriptor, info); + } +#endif + default: + throw InvalidArgumentException("RefWorkloadFactory: Destination type not supported for MemCopy Workload."); + return nullptr; + } +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefResizeBilinearFloat32Workload, RefResizeBilinearUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefFakeQuantizationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefL2NormalizationFloat32Workload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefConstantFloat32Workload, RefConstantUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefReshapeFloat32Workload, RefReshapeUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<RefFloorFloat32Workload, NullWorkload>(descriptor, info); +} + +} // namespace armnn diff --git a/src/armnn/backends/RefWorkloadFactory.hpp b/src/armnn/backends/RefWorkloadFactory.hpp new file mode 100644 index 0000000000..3fab490ad8 --- /dev/null +++ b/src/armnn/backends/RefWorkloadFactory.hpp @@ -0,0 +1,124 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "WorkloadFactory.hpp" +#include "OutputHandler.hpp" + +#include <boost/core/ignore_unused.hpp> + +namespace armnn +{ + +template <typename QueueDescriptorType> +constexpr bool IsOperationQueueDescriptor(const QueueDescriptorType&) { return true; } + +template <> +constexpr bool IsOperationQueueDescriptor(const MemCopyQueueDescriptor&) { return false; } + +template <> +constexpr bool IsOperationQueueDescriptor(const ConstantQueueDescriptor&) { return false; } + +template <> +constexpr bool IsOperationQueueDescriptor(const PermuteQueueDescriptor&) { return false; } + +// Reference workload factory +class RefWorkloadFactory : public IWorkloadFactory +{ +public: + explicit RefWorkloadFactory(bool operationWorkloadsAllowed = true); + virtual ~RefWorkloadFactory() { }; + + virtual Compute GetCompute() const override { return Compute::CpuRef; } + + static bool IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const override { return false; } + + virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override + { + boost::ignore_unused(parent, subTensorShape, subTensorOrigin); + return nullptr; + }; + + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + + virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + +private: + + template <typename F32Workload, typename U8Workload, typename QueueDescriptorType> + std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info) const; + + const bool m_OperationWorkloadsAllowed; +}; + +} // namespace armnn diff --git a/src/armnn/backends/RefWorkloads.hpp b/src/armnn/backends/RefWorkloads.hpp new file mode 100644 index 0000000000..ed4fa840da --- /dev/null +++ b/src/armnn/backends/RefWorkloads.hpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/RefWorkloads/RefConstantUint8Workload.hpp" +#include "backends/RefWorkloads/Addition.hpp" +#include "backends/RefWorkloads/ConvImpl.hpp" +#include "backends/RefWorkloads/RefMultiplicationUint8Workload.hpp" +#include "backends/RefWorkloads/RefBaseConstantWorkload.hpp" +#include "backends/RefWorkloads/RefConvolution2dUint8Workload.hpp" +#include "backends/RefWorkloads/RefSplitterUint8Workload.hpp" +#include "backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp" +#include "backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp" +#include "backends/RefWorkloads/Multiplication.hpp" +#include "backends/RefWorkloads/RefActivationUint8Workload.hpp" +#include "backends/RefWorkloads/RefPooling2dFloat32Workload.hpp" +#include "backends/RefWorkloads/RefWorkloadUtils.hpp" +#include "backends/RefWorkloads/RefMergerUint8Workload.hpp" +#include "backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp" +#include "backends/RefWorkloads/Softmax.hpp" +#include "backends/RefWorkloads/RefMergerFloat32Workload.hpp" +#include "backends/RefWorkloads/TensorBufferArrayView.hpp" +#include "backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp" +#include "backends/RefWorkloads/Splitter.hpp" +#include "backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp" +#include "backends/RefWorkloads/RefReshapeFloat32Workload.hpp" +#include "backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp" +#include "backends/RefWorkloads/FullyConnected.hpp" +#include "backends/RefWorkloads/RefFloorFloat32Workload.hpp" +#include "backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp" +#include "backends/RefWorkloads/RefSoftmaxUint8Workload.hpp" +#include "backends/RefWorkloads/RefReshapeUint8Workload.hpp" +#include "backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp" +#include "backends/RefWorkloads/RefAdditionUint8Workload.hpp" +#include "backends/RefWorkloads/RefMultiplicationFloat32Workload.hpp" +#include "backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp" +#include "backends/RefWorkloads/ResizeBilinear.hpp" +#include "backends/RefWorkloads/RefNormalizationFloat32Workload.hpp" +#include "backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp" +#include "backends/RefWorkloads/RefPooling2dUint8Workload.hpp" +#include "backends/RefWorkloads/BatchNormImpl.hpp" +#include "backends/RefWorkloads/Activation.hpp" +#include "backends/RefWorkloads/Merger.hpp" +#include "backends/RefWorkloads/RefSplitterFloat32Workload.hpp" +#include "backends/RefWorkloads/RefConstantFloat32Workload.hpp" +#include "backends/RefWorkloads/RefActivationFloat32Workload.hpp" +#include "backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp" +#include "backends/RefWorkloads/RefAdditionFloat32Workload.hpp" +#include "backends/RefWorkloads/Pooling2d.hpp" +#include "backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp" +#include "backends/RefWorkloads/RefPermuteWorkload.hpp" diff --git a/src/armnn/backends/RefWorkloads/Activation.cpp b/src/armnn/backends/RefWorkloads/Activation.cpp new file mode 100644 index 0000000000..ede283cbf9 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Activation.cpp @@ -0,0 +1,91 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "Activation.hpp" + +#include <boost/log/trivial.hpp> + +#include <cmath> + +namespace armnn +{ + +void Activation(const float* in, + float* out, + const TensorInfo& tensorInfo, + ActivationFunction function, + float a, + float b) +{ + for (size_t i = 0; i<tensorInfo.GetNumElements(); i++) + { + float input = in[i]; + float output; + + // compute the result of the activation function + switch (function) + { + case ActivationFunction::Linear: + { + output = a * input + b; + break; + } + case ActivationFunction::Sigmoid: + { + output = 1.f / (1.f + expf(-input)); + break; + } + case ActivationFunction::ReLu: + { + output = std::max(0.f, input); + break; + } + case ActivationFunction::BoundedReLu: + { + output = std::min(a, std::max(b, input)); + break; + } + case ActivationFunction::SoftReLu: + { + output = logf(1.0f + expf(input)); + break; + } + case ActivationFunction::LeakyReLu: + { + output = input > 0.0f ? input : (input * a); + break; + } + case ActivationFunction::Abs: + { + output = input < 0 ? -input : input; + break; + } + case ActivationFunction::Sqrt: + { + output = sqrtf(input); + break; + } + case ActivationFunction::Square: + { + output = input * input; + break; + } + case ActivationFunction::TanH: + { + output = a * tanhf(b * input); + break; + } + default: + { + BOOST_LOG_TRIVIAL(error) << "Unsupported activation function"; + return; + } + } + + out[i] = output; + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Activation.hpp b/src/armnn/backends/RefWorkloads/Activation.hpp new file mode 100644 index 0000000000..874441c862 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Activation.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include <armnn/Tensor.hpp> +#include <armnn/Types.hpp> + +namespace armnn +{ + +/// Performs the ActivationFunction elementwise on the inputs to give the outputs +void Activation(const float* in, + float* out, + const TensorInfo& tensorInfo, + ActivationFunction function, + float a, + float b); + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Addition.cpp b/src/armnn/backends/RefWorkloads/Addition.cpp new file mode 100644 index 0000000000..c26f82ecc2 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Addition.cpp @@ -0,0 +1,44 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "Addition.hpp" +#include "Broadcast.hpp" + +#include <functional> + +namespace armnn +{ + +namespace +{ + +void ElementwiseAddition(unsigned int numElements, const float* inData0, const float* inData1, float* outData) +{ + for (unsigned int i = 0; i < numElements; ++i) + { + outData[i] = inData0[i] + inData1[i]; + } +} + +} // namespace + +void Addition(const TensorShape& inShape0, + const TensorShape& inShape1, + const TensorShape& outShape, + const float* inData0, + const float* inData1, + float* outData) +{ + if (inShape0 == inShape1) + { + ElementwiseAddition(inShape0.GetNumElements(), inData0, inData1, outData); + } + else + { + BroadcastLoop(inShape0, inShape1, outShape).Unroll(std::plus<float>(), 0, inData0, inData1, outData); + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Addition.hpp b/src/armnn/backends/RefWorkloads/Addition.hpp new file mode 100644 index 0000000000..e62d63ec14 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Addition.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <armnn/Tensor.hpp> + +namespace armnn +{ + +void Addition(const TensorShape& inShape0, + const TensorShape& inShape1, + const TensorShape& outShape, + const float* inData0, + const float* inData1, + float* outData); + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/BatchNormImpl.hpp b/src/armnn/backends/RefWorkloads/BatchNormImpl.hpp new file mode 100644 index 0000000000..f40a277d17 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/BatchNormImpl.hpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include <armnn/Tensor.hpp> + +#include <cmath> + +namespace armnn +{ + +template<typename NormData> +static void BatchNormImpl(NormData data, + const float* varIn, + const float* meanIn, + const float* gammaIn, + const float* betaIn, + float * outputData, + const float * inputData) +{ + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + for (unsigned int c = 0; c < inputInfo0.GetShape()[1]; c++) + { + float var = varIn[c]; + float mean = meanIn[c]; + float gamma = gammaIn[c]; + float beta = betaIn[c]; + + float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps); + float add = beta - mult * mean; + + for (unsigned int n = 0; n < inputInfo0.GetShape()[0]; n++) + { + for (unsigned int j = 0; j < inputInfo0.GetShape()[2]; j++) + { + for (unsigned int i = 0; i < inputInfo0.GetShape()[3]; i++) + { + unsigned int index = i + + j*inputInfo0.GetShape()[3] + + c*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] + + n*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] + * inputInfo0.GetShape()[1]; + + outputData[index] = mult * inputData[index] + add; + } + } + } + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Broadcast.cpp b/src/armnn/backends/RefWorkloads/Broadcast.cpp new file mode 100644 index 0000000000..90ccb48616 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Broadcast.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "Broadcast.hpp" + +namespace armnn +{ + +BroadcastLoop::BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape) +: m_DimData(outShape.GetNumDimensions()) +{ + const unsigned int numDims = GetNumDimensions(); + + unsigned int sIn0 = 1; + unsigned int sIn1 = 1; + unsigned int sOut = 1; + + for (unsigned int j = numDims - 1, k = 0; k < numDims ; k++, j--) + { + m_DimData[j].m_DimSize = outShape[j]; + m_DimData[j].m_Stride1 = (inShape0[j] > 1) ? sIn0 : 0; + m_DimData[j].m_Stride2 = (inShape1[j] > 1) ? sIn1 : 0; + m_DimData[j].m_StrideOut = sOut; + + sIn0 *= inShape0[j]; + sIn1 *= inShape1[j]; + sOut *= outShape[j]; + } +} + +} // namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Broadcast.hpp b/src/armnn/backends/RefWorkloads/Broadcast.hpp new file mode 100644 index 0000000000..b65b57f7a1 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Broadcast.hpp @@ -0,0 +1,58 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include <armnn/Tensor.hpp> + +#include <functional> + +namespace armnn +{ + +struct BroadcastLoop +{ + BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape); + + unsigned int GetNumDimensions() + { + return static_cast<unsigned int>(m_DimData.size()); + } + + template <typename T0, typename T1, typename U, typename Func> + void Unroll(Func operationFunc, + unsigned int dimension, + const T0* inData0, + const T1* inData1, + U* outData) + { + if (dimension >= GetNumDimensions()) + { + *outData = operationFunc(*inData0, *inData1); + return; + } + + for (unsigned int i = 0; i < m_DimData[dimension].m_DimSize; i++) + { + Unroll(operationFunc, dimension + 1, inData0, inData1, outData); + + inData0 += m_DimData[dimension].m_Stride1; + inData1 += m_DimData[dimension].m_Stride2; + outData += m_DimData[dimension].m_StrideOut; + } + } + +private: + // Struct to hold the dimension data + struct BroadcastDimensionData + { + unsigned int m_DimSize; + unsigned int m_StrideOut; + unsigned int m_Stride1; + unsigned int m_Stride2; + }; + + std::vector<BroadcastDimensionData> m_DimData; +}; + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.cpp b/src/armnn/backends/RefWorkloads/ConvImpl.cpp new file mode 100644 index 0000000000..9ebadacddb --- /dev/null +++ b/src/armnn/backends/RefWorkloads/ConvImpl.cpp @@ -0,0 +1,71 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ConvImpl.hpp" + +#include <boost/assert.hpp> + +#include <cmath> +#include <limits> + +namespace armnn +{ + +QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier) +{ + BOOST_ASSERT(multiplier >= 0.0f && multiplier < 1.0f); + if (multiplier == 0.0f) + { + m_Multiplier = 0; + m_RightShift = 0; + } + else + { + const double q = std::frexp(multiplier, &m_RightShift); + m_RightShift = -m_RightShift; + int64_t qFixed = static_cast<int64_t>(std::round(q * (1ll << 31))); + BOOST_ASSERT(qFixed <= (1ll << 31)); + if (qFixed == (1ll << 31)) + { + qFixed /= 2; + --m_RightShift; + } + BOOST_ASSERT(m_RightShift >= 0); + BOOST_ASSERT(qFixed <= std::numeric_limits<int32_t>::max()); + m_Multiplier = static_cast<int32_t>(qFixed); + } +} + +int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const +{ + int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier); + return RoundingDivideByPOT(x, m_RightShift); +} + +int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b) +{ + // Check for overflow + if (a == b && a == std::numeric_limits<int32_t>::min()) + { + return std::numeric_limits<int32_t>::max(); + } + int64_t a_64(a); + int64_t b_64(b); + int64_t ab_64 = a_64 * b_64; + int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); + int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31)); + return ab_x2_high32; +} + +int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent) +{ + BOOST_ASSERT(exponent >= 0 && exponent <= 31); + int32_t mask = (1 << exponent) - 1; + int32_t remainder = x & mask; + int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0); + return (x >> exponent) + (remainder > threshold ? 1 : 0); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.hpp b/src/armnn/backends/RefWorkloads/ConvImpl.hpp new file mode 100644 index 0000000000..ecc5b14687 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/ConvImpl.hpp @@ -0,0 +1,184 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include <armnn/Tensor.hpp> + +#include <boost/assert.hpp> +#include <boost/numeric/conversion/cast.hpp> + +#include <cmath> +#include <limits> + +namespace armnn +{ + +/// Performs multiplication of a integer with a multiplier which is less than one, +/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. +struct QuantizedMultiplierSmallerThanOne +{ +public: + /// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier. + /// This stores the appropriate integer quantities (derived from the given multiplier) for later use. + /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne(). + QuantizedMultiplierSmallerThanOne(float multiplier); + + /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne() + int32_t operator*(int32_t rhs) const; + +private: + /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul() + static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b); + + /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT() + static int32_t RoundingDivideByPOT(int32_t x, int exponent); + + int32_t m_Multiplier; + int32_t m_RightShift; +}; + +/// an implementation shared by normal and depthwise convolution +template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType> +static void ConvImpl(ConvData data, + const InputType* inputData, + float inputScale, + int32_t inputOffset, + const InputType* filterData, + float filterScale, + int32_t filterOffset, + const BiasType* biasData, + InputType* outputData, + float outputScale, + int32_t outputOffset, + bool depthwise = false) +{ + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + const TensorInfo& filterInfo = data.m_Weight->GetTensorInfo(); + + unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1; + unsigned int channelsInput = filterInfo.GetShape()[1]; + unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0]; + + BOOST_ASSERT(data.m_Parameters.m_BiasEnabled == false || biasData != nullptr); + + unsigned int batchSize = outputInfo0.GetShape()[0]; + unsigned int heightOutput = outputInfo0.GetShape()[2]; + unsigned int widthOutput = outputInfo0.GetShape()[3]; + unsigned int heightInput = inputInfo0.GetShape()[2]; + unsigned int widthInput = inputInfo0.GetShape()[3]; + + unsigned int heightFilter = filterInfo.GetShape()[2]; + unsigned int widthFilter = filterInfo.GetShape()[3]; + + unsigned int paddingTop = data.m_Parameters.m_PadTop; + unsigned int paddingLeft = data.m_Parameters.m_PadLeft; + unsigned int hStride = data.m_Parameters.m_StrideY; + unsigned int xStride = data.m_Parameters.m_StrideX; + + // the world's least efficient convolution + for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) + { + for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++) + { + for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++) + { + for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++) + { + // this loop goes over each output element + AccumulatorType sum = AccumulatorType(); + + // for depthwise, each output channel corresponds to exactly one input channel + // for normal, must loop over each input channel + for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++) + { + unsigned int depthwiseMultiplierIdx = 0; + if (depthwise) + { + cInput = cOutput / depthMult; + depthwiseMultiplierIdx = cOutput % depthMult; + } + + for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++) + { + for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++) + { + // this loop goes over each input element for each output element + + unsigned int filterIndex; + + // since dimensionality of kernel depends on depthwiseness, so does index + if (depthwise) + { + filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput + + cInput * widthFilter * heightFilter + + yFilter * widthFilter + + xFilter; + } + else + { + filterIndex = cOutput * widthFilter * heightFilter * channelsInput + + cInput * widthFilter * heightFilter + + yFilter * widthFilter + + xFilter; + } + AccumulatorType filterValue = filterData[filterIndex] - + boost::numeric_cast<AccumulatorType>(filterOffset); + + unsigned int yInput = yOutput * hStride + yFilter; + unsigned int xInput = xOutput * xStride + xFilter; + + AccumulatorType inputValue; + + // check if we're in the padding + if (yInput < paddingTop || yInput >= heightInput + paddingTop || + xInput < paddingLeft || xInput >= widthInput + paddingLeft ) + { + inputValue = AccumulatorType(); + } + else + { + inputValue = inputData[batchIdx * widthInput * heightInput * channelsInput + + widthInput * heightInput * cInput + + widthInput * (yInput - paddingTop) + + xInput - paddingLeft] - + boost::numeric_cast<AccumulatorType>(inputOffset); + } + sum += filterValue * inputValue; + } + } + } + + if (data.m_Parameters.m_BiasEnabled) + { + sum += biasData[cOutput]; + } + + if (outputScale != 0.0f) + { + float multiplier = (inputScale * filterScale) / outputScale; + // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent + // with the AndroidNN CPU implementation. This should be (roughly) equivalent to: + // sum = std::round(multiplier * sum + outputOffset); + sum = boost::numeric_cast<AccumulatorType>( + QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum)) + + boost::numeric_cast<AccumulatorType>(outputOffset); + sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255); + } + + outputData[batchIdx * widthOutput * heightOutput * channelsOutput + + widthOutput * heightOutput * cOutput + + widthOutput * yOutput + + xOutput] = boost::numeric_cast<InputType>(sum); + } + } + } + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/FullyConnected.cpp b/src/armnn/backends/RefWorkloads/FullyConnected.cpp new file mode 100644 index 0000000000..8ba11d19c6 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/FullyConnected.cpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "FullyConnected.hpp" + +#include <boost/assert.hpp> + +namespace armnn +{ + +void FullyConnected(const float* inputData, + float* outputData, + const TensorInfo& inputTensorInfo, + const TensorInfo& outputTensorInfo, + const float* weightData, + const float* biasData, + bool transposeWeights) +{ + unsigned int N = outputTensorInfo.GetShape()[1]; // Output Vector Size + + BOOST_ASSERT(inputTensorInfo.GetNumDimensions() > 1); // Need some data + + unsigned int K = 1; // Total number of activations in the input + for (unsigned int i = 1; i < inputTensorInfo.GetNumDimensions(); i++) + { + K *= inputTensorInfo.GetShape()[i]; + } + + for (unsigned int n = 0; n < inputTensorInfo.GetShape()[0]; n++) + { + for (unsigned int channelOutput = 0; channelOutput < N; channelOutput++) + { + float outval = 0.f; + + for (unsigned int channelInput = 0; channelInput < K; channelInput++) + { + float weight; + if (transposeWeights) + { + weight = weightData[channelOutput * K + channelInput]; + } + else + { + weight = weightData[channelInput * N + channelOutput]; + } + + outval += weight * inputData[n * K + channelInput]; + } + + if (biasData) + { + outval += biasData[channelOutput]; + } + + outputData[n * N + channelOutput] = outval; + } + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/FullyConnected.hpp b/src/armnn/backends/RefWorkloads/FullyConnected.hpp new file mode 100644 index 0000000000..9fa2456110 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/FullyConnected.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <armnn/Tensor.hpp> + +namespace armnn +{ + +/// Performs a matrix multiplication and optionally adds a bias +void FullyConnected(const float* inputData, + float* outputData, + const TensorInfo& inputTensorInfo, + const TensorInfo& outputTensorInfo, + const float* weightData, + const float* biasData, + bool transposeWeights); + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Merger.hpp b/src/armnn/backends/RefWorkloads/Merger.hpp new file mode 100644 index 0000000000..9695e457e2 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Merger.hpp @@ -0,0 +1,81 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include "backends/WorkloadData.hpp" + +#include <armnn/Tensor.hpp> + +namespace armnn +{ + +template <typename DataType> +void Merger(const MergerQueueDescriptor& data) +{ + const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + + for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index) + { + unsigned int indices[MaxNumOfTensorDimensions]; + + unsigned int indexRemainder = index; + unsigned int dimensionStride = outputInfo0.GetNumElements(); + + for (unsigned int i=0; i<outputInfo0.GetNumDimensions(); i++) + { + dimensionStride /= outputInfo0.GetShape()[i]; + indices[i] = indexRemainder / dimensionStride; // use integer division to round down + indexRemainder -= indices[i] * dimensionStride; + } + + for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx) + { + MergerQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx]; + + //split view extents are defined by the size of (the corresponding) input tensor + const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]); + + // check all dimensions to see if this element is inside the given input view + bool insideView = true; + for (unsigned int i=0; i<inputInfo.GetNumDimensions(); i++) + { + if (indices[i] < view.m_Origin[i]) + { + insideView = false; + } + if (indices[i] >= view.m_Origin[i] + inputInfo.GetShape()[i]) + { + insideView = false; + } + } + + if (insideView) + { + unsigned int inIndex = 0; + unsigned int dimensionStride = 1; + + for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;) + { + inIndex += dimensionStride * (indices[i] - view.m_Origin[i]); + dimensionStride *= inputInfo.GetShape()[i]; + } + + //we are within the view, copy input data to the output corresponding to this view + (GetOutputTensorData<DataType>(0, data))[index] = + (GetInputTensorData<DataType>(viewIdx, data))[inIndex]; + + //what should we do if input views overlap on the output tensor? + //we could error, take the average, or shm else... + //for now just stop after finding first view (input) that matches. + break; + } + } + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Multiplication.cpp b/src/armnn/backends/RefWorkloads/Multiplication.cpp new file mode 100644 index 0000000000..7f558d83c5 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Multiplication.cpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "Multiplication.hpp" + +namespace armnn +{ + +void Multiplication(const float* in0, + const float* in1, + unsigned int numElements, + float* out) +{ + for (unsigned int i = 0; i < numElements; ++i) + { + out[i] = in0[i] * in1[i]; + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Multiplication.hpp b/src/armnn/backends/RefWorkloads/Multiplication.hpp new file mode 100644 index 0000000000..d0b033e7ec --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Multiplication.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +namespace armnn +{ + +void Multiplication(const float* in0, + const float* in1, + unsigned int numElements, + float* out); + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.cpp b/src/armnn/backends/RefWorkloads/Pooling2d.cpp new file mode 100644 index 0000000000..6d15d8a436 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Pooling2d.cpp @@ -0,0 +1,241 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "Pooling2d.hpp" + +#include <armnn/Exceptions.hpp> +#include <armnn/Types.hpp> + +#include <boost/numeric/conversion/cast.hpp> + +#include <limits> +#include <algorithm> +#include <functional> + +namespace +{ + using PoolingAlgorithm = armnn::PoolingAlgorithm; + + float DefaultInitializer(PoolingAlgorithm algorithm) + { + switch (algorithm) + { + case PoolingAlgorithm::Max: + { + return std::numeric_limits<float>::lowest(); + } + case PoolingAlgorithm::Average: + case PoolingAlgorithm::L2: + { + return 0.0f; + } + default: + { + throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); + } + } + } + + using Accumulator = std::function<void(float & accu, float value)>; + + Accumulator GetAccumulator(PoolingAlgorithm algorithm) + { + switch (algorithm) + { + case PoolingAlgorithm::Max: + { + return [](float & accu, float value) { + if (value > accu) { + accu = value; + } + }; + } + + case PoolingAlgorithm::Average: + { + return [](float & accu, float value) { + accu += value; + }; + } + + case PoolingAlgorithm::L2: + { + return [](float & accu, float value) { + accu += (value*value); + }; + } + + default: + { + throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); + } + } + } + + using Executor = std::function<void(float & accumulated, float kernelSize)>; + + Executor GetExecutor(PoolingAlgorithm algorithm) + { + switch (algorithm) + { + case PoolingAlgorithm::Max: + { + return [](float & accumulated, float kernelSize) {}; + } + + case PoolingAlgorithm::Average: + { + return [](float & accumulated, float kernelSize) { + accumulated /= kernelSize; + }; + } + + case PoolingAlgorithm::L2: + { + return [](float & accumulated, float kernelSize) { + accumulated = sqrtf(accumulated / kernelSize); + }; + } + + default: + { + throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); + } + } + } + + bool OnPaddingOnly(int start, int end, int maxRange, int padding) + { + if (end <= 0 || start > (maxRange - padding)) + { + return true; + } + else + { + return false; + } + } + + + bool ClampRange(int & start, int & end, int maxRange) + { + if (start < 0 || end > maxRange) + { + start = std::min(std::max(start, 0), maxRange); + end = std::min(std::max(end, 0), maxRange); + return true; + } + else + { + return false; + } + } +} + +namespace armnn +{ + +void Pooling2d(const float* in, + float* out, + const TensorInfo& inputInfo, + const TensorInfo& outputInfo, + const Pooling2dDescriptor& params) +{ + const int batchSize = boost::numeric_cast<int>(outputInfo.GetShape()[0]); + const int channels = boost::numeric_cast<int>(outputInfo.GetShape()[1]); + const int heightOutput = boost::numeric_cast<int>(outputInfo.GetShape()[2]); + const int widthOutput = boost::numeric_cast<int>(outputInfo.GetShape()[3]); + const int heightInput = boost::numeric_cast<int>(inputInfo.GetShape()[2]); + const int widthInput = boost::numeric_cast<int>(inputInfo.GetShape()[3]); + const int padLeft = boost::numeric_cast<int>(params.m_PadLeft); + const int padRight = boost::numeric_cast<int>(params.m_PadRight); + const int padTop = boost::numeric_cast<int>(params.m_PadTop); + const int padBottom = boost::numeric_cast<int>(params.m_PadBottom); + const int strideX = boost::numeric_cast<int>(params.m_StrideX); + const int strideY = boost::numeric_cast<int>(params.m_StrideY); + const int poolHeight = boost::numeric_cast<int>(params.m_PoolHeight); + const int poolWidth = boost::numeric_cast<int>(params.m_PoolWidth); + + float defaultInitializer = DefaultInitializer(params.m_PoolType); + + Accumulator accumulate = GetAccumulator(params.m_PoolType); + Executor execute = GetExecutor(params.m_PoolType); + + // Check supported padding methods outside the loop to simplify + // the inner loop + if (params.m_PaddingMethod != PaddingMethod::Exclude && + params.m_PaddingMethod != PaddingMethod::IgnoreValue) + { + throw armnn::InvalidArgumentException("Unsupported padding type"); + } + + for (int n = 0; n < batchSize; n++) + { + for (int c = 0; c < channels; c++) + { + for (int yOutput = 0; yOutput < heightOutput; yOutput++) + { + for (int xOutput = 0; xOutput < widthOutput; xOutput++) + { + int hstart = (yOutput * strideY) - padTop; + int wstart = (xOutput * strideX) - padLeft; + int hend = hstart + poolHeight; + int wend = wstart + poolWidth; + + // Clamp the pooling region inside the valid input area (which includes the padding). + // This is necessary because the final pooling in a row may overlap beyond the padding. + hend = std::min(hend, heightInput + padRight); + wend = std::min(wend, widthInput + padBottom); + + float result = defaultInitializer; + float poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart)); + + // special case: when the pooling kernel is over a padding region and the padding + // size is larger or equal to the kernel and the kernel only covers + // padding and no real values, then we initialize the result as zero + // by convention. This is because we need to choose a value here and + // all values we have are padding, which we ignore. + if (OnPaddingOnly(hstart, hend, heightInput, padBottom) || + OnPaddingOnly(wstart, wend, widthInput, padRight)) + { + result = 0.0f; + } + + bool clamped = ClampRange(wstart, wend, widthInput); + clamped |= ClampRange(hstart, hend, heightInput); + + if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude) + { + // when we exclude the padding, it means we calculate with a smaller + // kernel size, so I change the divisor here + poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart)); + } + + for (auto yInput = hstart; yInput < hend; yInput++) + { + for (auto xInput = wstart; xInput < wend; xInput++) + { + float inval = in[n * widthInput * heightInput * channels + + c * widthInput * heightInput + + yInput * widthInput + + xInput]; + + accumulate(result, inval); + } + } + + execute(result, poolAreaSize); + + out[n * widthOutput * heightOutput * channels + + c * widthOutput * heightOutput + + yOutput * widthOutput + + xOutput] = result; + } + } + } + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.hpp b/src/armnn/backends/RefWorkloads/Pooling2d.hpp new file mode 100644 index 0000000000..f88b1a0a4e --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Pooling2d.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <armnn/Descriptors.hpp> +#include <armnn/Tensor.hpp> + +namespace armnn +{ + +/// Computes the Pooling2d operation +void Pooling2d(const float* in, + float* out, + const TensorInfo& inputInfo, + const TensorInfo& outputInfo, + const Pooling2dDescriptor& params); + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.cpp new file mode 100644 index 0000000000..f566759deb --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.cpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefActivationFloat32Workload.hpp" + +#include "Activation.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefActivationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationFloat32Workload_Execute"); + + Activation(GetInputTensorDataFloat(0, m_Data), + GetOutputTensorDataFloat(0, m_Data), + GetTensorInfo(m_Data.m_Inputs[0]), + m_Data.m_Parameters.m_Function, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.hpp new file mode 100644 index 0000000000..d8bd216699 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefActivationFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" + +namespace armnn +{ + +class RefActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor> +{ +public: + using Float32Workload<ActivationQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.cpp new file mode 100644 index 0000000000..e8852f4bf8 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefActivationUint8Workload.hpp" + +#include "Activation.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include <vector> + +namespace armnn +{ + +void RefActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationUint8Workload_Execute"); + + const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo); + + std::vector<float> results(tensorInfo.GetNumElements()); + + Activation(dequant.data(), + results.data(), + tensorInfo, + m_Data.m_Parameters.m_Function, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0])); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.hpp new file mode 100644 index 0000000000..51514d0646 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefActivationUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor> +{ +public: + using Uint8Workload<ActivationQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.cpp new file mode 100644 index 0000000000..e06d7f9295 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefAdditionFloat32Workload.hpp" + +#include "Addition.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefAdditionFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefAdditionFloat32Workload_Execute"); + + const TensorShape& inShape0 = GetTensorInfo(m_Data.m_Inputs[0]).GetShape(); + const TensorShape& inShape1 = GetTensorInfo(m_Data.m_Inputs[1]).GetShape(); + const TensorShape& outShape = GetTensorInfo(m_Data.m_Outputs[0]).GetShape(); + + const float* inData0 = GetInputTensorDataFloat(0, m_Data); + const float* inData1 = GetInputTensorDataFloat(1, m_Data); + float* outData = GetOutputTensorDataFloat(0, m_Data); + + Addition(inShape0, inShape1, outShape, inData0, inData1, outData); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.hpp new file mode 100644 index 0000000000..e69ea28b28 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefAdditionFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefAdditionFloat32Workload : public Float32Workload<AdditionQueueDescriptor> +{ +public: + using Float32Workload<AdditionQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.cpp new file mode 100644 index 0000000000..fa2dfeefc0 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefAdditionUint8Workload.hpp" + +#include "Addition.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include <vector> + +namespace armnn +{ + +void RefAdditionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefAdditionUint8Workload_Execute"); + + const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto dequant0 = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0); + auto dequant1 = Dequantize(GetInputTensorDataU8(1, m_Data), inputInfo1); + + std::vector<float> results(outputInfo.GetNumElements()); + + Addition(inputInfo0.GetShape(), + inputInfo1.GetShape(), + outputInfo.GetShape(), + dequant0.data(), + dequant1.data(), + results.data()); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.hpp new file mode 100644 index 0000000000..0f5a23ef4d --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefAdditionUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefAdditionUint8Workload : public Uint8Workload<AdditionQueueDescriptor> +{ +public: + using Uint8Workload<AdditionQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.cpp b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.cpp new file mode 100644 index 0000000000..50a198f011 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefBaseConstantWorkload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include <armnn/Types.hpp> + +#include <boost/assert.hpp> + +#include <cstring> + +namespace armnn +{ + +template <armnn::DataType DataType> +void RefBaseConstantWorkload<DataType>::Execute() const +{ + // Considering the reference backend independently, it could be possible to initialise the intermediate tensor + // created by the layer output handler at workload construction time, rather than at workload execution time. + // However, this is not an option for other backends (e.g. CL). For consistency, we prefer to align all + // implementations. + // A similar argument can be made about performing the memory copy in the first place (the layer output handler + // could have a non-owning reference to the layer output tensor managed by the const input layer); again, this is + // not an option for other backends, and the extra complexity required to make this work for the reference backend + // may not be worth the effort (skipping a memory copy in the first inference). + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + + const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); + BOOST_ASSERT(data.m_LayerOutput->GetTensorInfo().GetNumBytes() == outputInfo.GetNumBytes()); + + memcpy(GetOutputTensorData<void>(0, data), data.m_LayerOutput->GetConstTensor<void>(), + outputInfo.GetNumBytes()); + + m_RanOnce = true; + } +} + +template class RefBaseConstantWorkload<DataType::Float32>; +template class RefBaseConstantWorkload<DataType::QuantisedAsymm8>; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp new file mode 100644 index 0000000000..0ede46d9fb --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefBaseConstantWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +#include <armnn/Types.hpp> + +namespace armnn +{ + +// Base class template providing an implementation of the Constant layer common to all data types +template <armnn::DataType DataType> +class RefBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataType> +{ +public: + RefBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload<ConstantQueueDescriptor, DataType>(descriptor, info) + , m_RanOnce(false) + { + } + + virtual void Execute() const override; + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..c421b0f212 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefBatchNormalizationFloat32Workload.hpp" + +#include "BatchNormImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefBatchNormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationFloat32Workload_Execute"); + + const float* var = m_Data.m_Variance->GetConstTensor<float>(); + const float* mean = m_Data.m_Mean->GetConstTensor<float>(); + const float* gamma = m_Data.m_Gamma->GetConstTensor<float>(); + const float* beta = m_Data.m_Beta->GetConstTensor<float>(); + + auto inputData = GetInputTensorDataFloat(0, m_Data); + auto outputData = GetOutputTensorDataFloat(0, m_Data); + + BatchNormImpl(m_Data, var, mean, gamma, beta, outputData, inputData); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..cbcdadd749 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor> +{ +public: + using Float32Workload<BatchNormalizationQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp new file mode 100644 index 0000000000..8a48523765 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefBatchNormalizationUint8Workload.hpp" + +#include "BatchNormImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include <vector> + +namespace armnn +{ + +void RefBatchNormalizationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationUint8Workload_Execute"); + + const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& varInfo = GetTensorInfo(m_Data.m_Variance); + const TensorInfo& meanInfo = GetTensorInfo(m_Data.m_Mean); + const TensorInfo& gammaInfo = GetTensorInfo(m_Data.m_Gamma); + const TensorInfo& betaInfo = GetTensorInfo(m_Data.m_Beta); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto input = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0); + auto var = Dequantize(m_Data.m_Variance->GetConstTensor<uint8_t>(), varInfo); + auto mean = Dequantize(m_Data.m_Mean->GetConstTensor<uint8_t>(), meanInfo); + auto gamma = Dequantize(m_Data.m_Gamma->GetConstTensor<uint8_t>(), gammaInfo); + auto beta = Dequantize(m_Data.m_Beta->GetConstTensor<uint8_t>(), betaInfo); + + std::vector<float> results(outputInfo.GetNumElements()); + BatchNormImpl(m_Data, var.data(), mean.data(), gamma.data(), beta.data(), results.data(), input.data()); + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp new file mode 100644 index 0000000000..57fe995ba5 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefBatchNormalizationUint8Workload : public Uint8Workload<BatchNormalizationQueueDescriptor> +{ +public: + using Uint8Workload<BatchNormalizationQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.cpp new file mode 100644 index 0000000000..0ed66013f6 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefConstantFloat32Workload.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConstantFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantFloat32Workload_Execute"); + RefBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.hpp new file mode 100644 index 0000000000..f0876a99bf --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConstantFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "RefBaseConstantWorkload.hpp" + +namespace armnn +{ + +class RefConstantFloat32Workload : public RefBaseConstantWorkload<DataType::Float32> +{ +public: + using RefBaseConstantWorkload<DataType::Float32>::RefBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.cpp new file mode 100644 index 0000000000..2a4a514ad8 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefConstantUint8Workload.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantUint8Workload_Execute"); + RefBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.hpp new file mode 100644 index 0000000000..504737dade --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "RefBaseConstantWorkload.hpp" + +namespace armnn +{ + +class RefConstantUint8Workload : public RefBaseConstantWorkload<DataType::QuantisedAsymm8> +{ +public: + using RefBaseConstantWorkload<DataType::QuantisedAsymm8>::RefBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp new file mode 100644 index 0000000000..6e4cc69063 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefConvolution2dFloat32Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConvolution2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute"); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + const float* weightData = m_Data.m_Weight->template GetConstTensor<float>(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Data.m_Bias->template GetConstTensor<float>() : nullptr; + + ConvImpl<armnn::Convolution2dQueueDescriptor, float, float, float>( + m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp new file mode 100644 index 0000000000..514369c262 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor> +{ +public: + using Float32Workload<Convolution2dQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..f390baa387 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefConvolution2dUint8Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dUint8Workload_Execute"); + + const uint8_t* inputData = GetInputTensorDataU8(0, m_Data); + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const uint8_t* weightsData = m_Data.m_Weight->template GetConstTensor<uint8_t>(); + const TensorInfo& weightsInfo = GetTensorInfo(m_Data.m_Weight); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Data.m_Bias->template GetConstTensor<int32_t>() : + nullptr; + uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + ConvImpl<armnn::Convolution2dQueueDescriptor, uint8_t, int32_t, int32_t>( + m_Data, + inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), + weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), + biasData, + outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset()); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..954a206463 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor> +{ +public: + using Uint8Workload<Convolution2dQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp new file mode 100644 index 0000000000..c631fecb66 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefDepthwiseConvolution2dFloat32Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefDepthwiseConvolution2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute"); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + const float* weightData = m_Data.m_Weight->template GetConstTensor<float>(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Data.m_Bias->template GetConstTensor<float>() : nullptr; + + ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, float, float, float> + (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, true); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp new file mode 100644 index 0000000000..34e6524684 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefDepthwiseConvolution2dFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + using Float32Workload<DepthwiseConvolution2dQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..5a8fb13112 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefDepthwiseConvolution2dUint8Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefDepthwiseConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dUint8Workload_Execute"); + + const uint8_t* inputData = GetInputTensorDataU8(0, m_Data); + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const uint8_t* weightsData = m_Data.m_Weight->template GetConstTensor<uint8_t>(); + const TensorInfo& weightsInfo = GetTensorInfo(m_Data.m_Weight); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Data.m_Bias->template GetConstTensor<int32_t>() : + nullptr; + uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, uint8_t, int32_t, int32_t>( + m_Data, + inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), + weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), + biasData, + outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), true); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..bd9945f529 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefDepthwiseConvolution2dUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + using Uint8Workload<DepthwiseConvolution2dQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp new file mode 100644 index 0000000000..483fa7e00e --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefFakeQuantizationFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include <boost/numeric/conversion/cast.hpp> + +namespace armnn +{ + +void FakeQuantization(const float* inputData, float* outputData, uint32_t numElements, float min, float max) +{ + float scale = (max - min) / 255.f; + int32_t offset = boost::numeric_cast<int32_t>((-min * 255.f) / (max - min)); + + for (uint32_t i = 0; i < numElements; i++) + { + outputData[i] = static_cast<float>(armnn::Quantize<uint8_t>(inputData[i], scale, offset)); + } + +} + +void RefFakeQuantizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFakeQuantizationFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + const float* inputData = GetInputTensorDataFloat(0, m_Data); + float* outputData = GetOutputTensorDataFloat(0, m_Data); + FakeQuantization(inputData, outputData, inputInfo.GetNumElements(), + m_Data.m_Parameters.m_Min, + m_Data.m_Parameters.m_Max); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp new file mode 100644 index 0000000000..7ad5272edb --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFakeQuantizationFloat32Workload : public Float32Workload<FakeQuantizationQueueDescriptor> +{ +public: + using Float32Workload<FakeQuantizationQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.cpp new file mode 100644 index 0000000000..4bc7ec4404 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefFloorFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefFloorFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFloorFloat32Workload_Execute"); + + const float* const input = GetInputTensorDataFloat(0, m_Data); + float* const output = GetOutputTensorDataFloat(0, m_Data); + + unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + for (unsigned int i = 0; i < numElements; ++i) + { + output[i] = floorf(input[i]); + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.hpp new file mode 100644 index 0000000000..1eb5e2ff7b --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefFloorFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor> +{ +public: + using Float32Workload<FloorQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp new file mode 100644 index 0000000000..6fe203e5f0 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefFullyConnectedFloat32Workload.hpp" + +#include "FullyConnected.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefFullyConnectedFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + const float* weightData = m_Data.m_Weight->GetConstTensor<float>(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Data.m_Bias->GetConstTensor<float>() : nullptr; + + FullyConnected(inputData, + outputData, + inputInfo, + outputInfo, + weightData, + biasData, + m_Data.m_Parameters.m_TransposeWeightMatrix); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp new file mode 100644 index 0000000000..cb835bd2ce --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFullyConnectedFloat32Workload : public Float32Workload<FullyConnectedQueueDescriptor> +{ +public: + using Float32Workload<FullyConnectedQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp new file mode 100644 index 0000000000..0186d3f5e5 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefFullyConnectedUint8Workload.hpp" + +#include "FullyConnected.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include <vector> + +namespace armnn +{ + +void RefFullyConnectedUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedUint8Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + const uint8_t* weightData = m_Data.m_Weight->GetConstTensor<uint8_t>(); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); + + auto weight = Dequantize(weightData, m_Data.m_Weight->GetTensorInfo()); + + std::vector<float> results(inputInfo.GetNumElements()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + const int32_t* biasData = m_Data.m_Bias->GetConstTensor<int32_t>(); + auto bias = Dequantize(biasData, m_Data.m_Bias->GetTensorInfo()); + + FullyConnected(dequant.data(), + results.data(), + inputInfo, + outputInfo, + weight.data(), + bias.data(), + m_Data.m_Parameters.m_TransposeWeightMatrix); + } + else + { + FullyConnected(dequant.data(), + results.data(), + inputInfo, + outputInfo, + weight.data(), + nullptr, + m_Data.m_Parameters.m_TransposeWeightMatrix); + } + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp new file mode 100644 index 0000000000..cd14ea85e0 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFullyConnectedUint8Workload : public Uint8Workload<FullyConnectedQueueDescriptor> +{ +public: + using Uint8Workload<FullyConnectedQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..82c1ecd32e --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp @@ -0,0 +1,61 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefL2NormalizationFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "TensorBufferArrayView.hpp" + +#include "Profiling.hpp" + +#include <cmath> + +namespace armnn +{ + +void RefL2NormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefL2NormalizationFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + TensorBufferArrayView<const float> input(inputInfo.GetShape(), GetInputTensorDataFloat(0, m_Data)); + TensorBufferArrayView<float> output(outputInfo.GetShape(), GetOutputTensorDataFloat(0, m_Data)); + + const unsigned int batchSize = inputInfo.GetShape()[0]; + const unsigned int depth = inputInfo.GetShape()[1]; + const unsigned int rows = inputInfo.GetShape()[2]; + const unsigned int cols = inputInfo.GetShape()[3]; + + for (unsigned int n = 0; n < batchSize; ++n) + { + for (unsigned int d = 0; d < depth; ++d) + { + for (unsigned int h = 0; h < rows; ++h) + { + for (unsigned int w = 0; w < cols; ++w) + { + float reduction = 0.0; + for (unsigned int c = 0; c < depth; ++c) + { + const float value = input.Get(n, c, h, w); + reduction += value * value; + } + + // Using std::max(reduction, epsilon) below would prevent against division by 0. + // However, at the time of writing: + // - This is not supported by the ACL functions used to implement L2Normalization in the CL + // backend. + // - The reference semantics for this operator do not include this parameter. + const float scale = 1.0f / sqrtf(reduction); + output.Get(n, d, h, w) = input.Get(n, d, h, w) * scale; + } + } + } + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..a2420279f5 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor> +{ +public: + using Float32Workload<L2NormalizationQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp new file mode 100644 index 0000000000..41d3c05d4b --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefMergerFloat32Workload.hpp" + +#include "Merger.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefMergerFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerFloat32Workload_Execute"); + Merger<float>(m_Data); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.hpp new file mode 100644 index 0000000000..d894c2a2ca --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefMergerFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefMergerFloat32Workload : public Float32Workload<MergerQueueDescriptor> +{ +public: + using Float32Workload<MergerQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.cpp new file mode 100644 index 0000000000..3f4371b628 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefMergerUint8Workload.hpp" + +#include "Merger.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerUint8Workload_Execute"); + Merger<uint8_t>(m_Data); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.hpp new file mode 100644 index 0000000000..4c9bbcac50 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefMergerUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefMergerUint8Workload : public Uint8Workload<MergerQueueDescriptor> +{ +public: + using Uint8Workload<MergerQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp new file mode 100644 index 0000000000..ed68b1f6db --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefMultiplicationFloat32Workload.hpp" + +#include "Multiplication.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefMultiplicationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMultiplicationFloat32Workload_Execute"); + + const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData0 = GetInputTensorDataFloat(0, m_Data); + const float* inputData1 = GetInputTensorDataFloat(1, m_Data); + Multiplication(inputData0, inputData1, inputInfo0.GetNumElements(), outputData); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.hpp new file mode 100644 index 0000000000..920d072836 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefMultiplicationFloat32Workload : public Float32Workload<MultiplicationQueueDescriptor> +{ +public: + using Float32Workload<MultiplicationQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp new file mode 100644 index 0000000000..2e6f0e6c8b --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefMultiplicationUint8Workload.hpp" + +#include "Multiplication.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include <vector> + +namespace armnn +{ + +void RefMultiplicationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMultiplicationUint8Workload_Execute"); + + const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto dequant0 = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0); + auto dequant1 = Dequantize(GetInputTensorDataU8(1, m_Data), inputInfo1); + + std::vector<float> results(outputInfo.GetNumElements()); + Multiplication(dequant0.data(), + dequant1.data(), + inputInfo0.GetNumElements(), + results.data()); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.hpp new file mode 100644 index 0000000000..5da2e581eb --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefMultiplicationUint8Workload : public Uint8Workload<MultiplicationQueueDescriptor> +{ +public: + using Uint8Workload<MultiplicationQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..c743207423 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp @@ -0,0 +1,185 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefNormalizationFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include <armnn/Tensor.hpp> + +#include <boost/log/trivial.hpp> +#include <boost/numeric/conversion/cast.hpp> + +namespace armnn +{ + +// Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization +static void NormalizeWithinUingLbr(const float* inputData, + float* outputData, + const TensorShape& tensorShape, + uint32_t norm_size, + float alpha, + float beta, + float kappa) +{ + const unsigned int batchSize = tensorShape[0]; + const unsigned int depth = tensorShape[1]; + const unsigned int rows = tensorShape[2]; + const unsigned int cols = tensorShape[3]; + + int radius = boost::numeric_cast<int>(norm_size / 2u); /* Strong Assumption on rounding Mode */ + + for (unsigned int n = 0; n < batchSize; n++) + { + for (unsigned int c = 0; c < depth; c++) + { + for (unsigned int h = 0; h < rows; h++) + { + for (unsigned int w = 0; w < cols; w++) + { + float accumulated_scale = 0.0; + for (int y = -radius; y <= radius; y++) + { + for (int x = -radius; x <= radius; x++) + { + int i = boost::numeric_cast<int>(w) + x; + int j = boost::numeric_cast<int>(h) + y; + + if ((i < 0) || (i >= boost::numeric_cast<int>(cols))) + { + continue; + } + + if ((j < 0) || (j >= boost::numeric_cast<int>(rows))) + { + continue; + } + + float inval = inputData[n * cols * rows * depth + + c * cols * rows + + boost::numeric_cast<unsigned int>(j) * cols + + boost::numeric_cast<unsigned int>(i)]; + + accumulated_scale += inval*inval; + } + } + outputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w] = inputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w] / (powf((kappa + (accumulated_scale * alpha)), beta)); + } + } + } + } +} + +// Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization +void NormalizeAcrossUingLbr(const float* inputData, + float* outputData, + const TensorShape& tensorShape, + uint32_t norm_size, + float alpha, + float beta, + float kappa) +{ + const unsigned int batchSize = tensorShape[0]; + const unsigned int depth = tensorShape[1]; + const unsigned int rows = tensorShape[2]; + const unsigned int cols = tensorShape[3]; + + int radius = boost::numeric_cast<int>(norm_size / 2u); /* Strong Assumption on rounding Mode */ + + for (unsigned int n = 0; n < batchSize; n++) + { + for (unsigned int c = 0; c < depth; c++) + { + for (unsigned int h = 0; h < rows; h++) + { + for (unsigned int w = 0; w < cols; w++) + { + float accumulated_scale = 0.0; + for (int z = -radius; z <= radius; z++) + { + int k = boost::numeric_cast<int>(c) + z; + + if ((k < 0) || (k >= boost::numeric_cast<int>(depth))) + { + continue; + } + + float inval = inputData[n * cols * rows * depth + + boost::numeric_cast<unsigned int>(k) * cols * rows + + h * cols + + w]; + + accumulated_scale += inval*inval; + } + float scale = kappa + (accumulated_scale * alpha); + scale = powf(scale, -beta); + outputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w] = scale * + inputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w]; + } + } + } + } +} + +void RefNormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + + + if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType) + { + if (NormalizationAlgorithmChannel::Within == m_Data.m_Parameters.m_NormChannelType) + { + NormalizeWithinUingLbr(inputData, + outputData, + inputInfo.GetShape(), + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K); + } + else if (NormalizationAlgorithmChannel::Across == m_Data.m_Parameters.m_NormChannelType) + { + NormalizeAcrossUingLbr(inputData, + outputData, + inputInfo.GetShape(), + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K); + } + else + { + BOOST_LOG_TRIVIAL(warning) << "Illegal NORMALIZATION mode in normalization_f32"; + return; + } + } + else + { + BOOST_LOG_TRIVIAL(warning) << "Lcr method (Jarret 2009: Local Contrast Normalization) not supported yet."; + return; + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..6f4175ae35 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor> +{ +public: + using Float32Workload<NormalizationQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp new file mode 100644 index 0000000000..b2bb8fbf3d --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefPermuteWorkload.hpp" +#include "RefWorkloadUtils.hpp" + +#include <Permute.hpp> + +namespace armnn +{ + +template <armnn::DataType DataType> +void RefPermuteWorkload<DataType>::Execute() const +{ + using T = ResolveType<DataType>; + + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute"); + + const ITensorHandle* src = m_Data.m_Inputs[0]; + const ITensorHandle* dst = m_Data.m_Outputs[0]; + const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, GetConstCpuData<T>(src), GetCpuData<T>(dst)); +} + +template class RefPermuteWorkload<DataType::Float32>; +template class RefPermuteWorkload<DataType::QuantisedAsymm8>; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp new file mode 100644 index 0000000000..4ca1f38588 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefPermuteWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" + +#include <armnn/TypesUtils.hpp> + +namespace armnn +{ + +template <armnn::DataType DataType> +class RefPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType> +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("RefPermute") + GetDataTypeName(DataType) + "Workload"; + return name; + } + + using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data; + using TypedWorkload<PermuteQueueDescriptor, DataType>::TypedWorkload; + void Execute() const override; +}; + +using RefPermuteFloat32Workload = RefPermuteWorkload<DataType::Float32>; +using RefPermuteUint8Workload = RefPermuteWorkload<DataType::QuantisedAsymm8>; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp new file mode 100644 index 0000000000..030f96c892 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefPooling2dFloat32Workload.hpp" + +#include "Pooling2d.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefPooling2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dFloat32Workload_Execute"); + + const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo0 = GetTensorInfo(m_Data.m_Outputs[0]); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + + Pooling2d(inputData, + outputData, + inputInfo0, + outputInfo0, + m_Data.m_Parameters); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp new file mode 100644 index 0000000000..598b365a17 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefPooling2dFloat32Workload : public Float32Workload<Pooling2dQueueDescriptor> +{ +public: + using Float32Workload<Pooling2dQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..7066fc910b --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.cpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefPooling2dUint8Workload.hpp" + +#include "Pooling2d.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include <vector> + +namespace armnn +{ + +void RefPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dUint8Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); + + std::vector<float> results(outputInfo.GetNumElements()); + Pooling2d(dequant.data(), + results.data(), + inputInfo, + outputInfo, + m_Data.m_Parameters); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..cbeca2c41d --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefPooling2dUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefPooling2dUint8Workload : public Uint8Workload<Pooling2dQueueDescriptor> +{ +public: + using Uint8Workload<Pooling2dQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.cpp new file mode 100644 index 0000000000..3bf7b48622 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefReshapeFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include <cstring> + +namespace armnn +{ + +void RefReshapeFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeFloat32Workload_Execute"); + + void* output = GetOutputTensorData<void>(0, m_Data); + const void* input = GetInputTensorData<void>(0, m_Data); + unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes(); + memcpy(output, input, numBytes); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.hpp new file mode 100644 index 0000000000..36fdf7f812 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefReshapeFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor> +{ +public: + using Float32Workload<ReshapeQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp new file mode 100644 index 0000000000..38742607cd --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefReshapeUint8Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include <cstring> + +namespace armnn +{ + +void RefReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeUint8Workload_Execute"); + + void* output = GetOutputTensorData<void>(0, m_Data); + const void* input = GetInputTensorData<void>(0, m_Data); + unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes(); + memcpy(output, input, numBytes); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.hpp new file mode 100644 index 0000000000..38da277bd2 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefReshapeUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor> +{ +public: + using Uint8Workload<ReshapeQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp new file mode 100644 index 0000000000..8ad7a76298 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefResizeBilinearFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "ResizeBilinear.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefResizeBilinearFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + ResizeBilinear(GetInputTensorDataFloat(0, m_Data), + inputInfo, + GetOutputTensorDataFloat(0, m_Data), + outputInfo); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp new file mode 100644 index 0000000000..86e8693b91 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefResizeBilinearFloat32Workload : public Float32Workload<ResizeBilinearQueueDescriptor> +{ +public: + using Float32Workload<ResizeBilinearQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp new file mode 100644 index 0000000000..dfa561db6d --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefResizeBilinearUint8Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "ResizeBilinear.hpp" + +#include "Profiling.hpp" + +#include <vector> + +namespace armnn +{ + +void RefResizeBilinearUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearUint8Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); + + std::vector<float> results(outputInfo.GetNumElements()); + ResizeBilinear(dequant.data(), inputInfo, results.data(), outputInfo); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp new file mode 100644 index 0000000000..f72fafda4f --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefResizeBilinearUint8Workload : public Uint8Workload<ResizeBilinearQueueDescriptor> +{ +public: + using Uint8Workload<ResizeBilinearQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp new file mode 100644 index 0000000000..590e514d3d --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefSoftmaxFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "Softmax.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefSoftmaxFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxFloat32Workload_Execute"); + + Softmax(GetInputTensorDataFloat(0, m_Data), + GetOutputTensorDataFloat(0, m_Data), + GetTensorInfo(m_Data.m_Inputs[0]), + m_Data.m_Parameters.m_Beta); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp new file mode 100644 index 0000000000..4d30f9fa3f --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor> +{ +public: + using Float32Workload<SoftmaxQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..5ef4a6da92 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp @@ -0,0 +1,36 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefSoftmaxUint8Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "Softmax.hpp" + +#include "Profiling.hpp" + +#include <vector> + +namespace armnn +{ + +void RefSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxUint8Workload_Execute"); + + const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo); + + std::vector<float> results(tensorInfo.GetNumElements()); + + Softmax(dequant.data(), + results.data(), + tensorInfo, + m_Data.m_Parameters.m_Beta); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0])); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..fadc764e0a --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor> +{ +public: + using Uint8Workload<SoftmaxQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp new file mode 100644 index 0000000000..35ab4e22ef --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefSplitterFloat32Workload.hpp" + +#include "Splitter.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefSplitterFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterFloat32Workload_Execute"); + Splitter<float>(m_Data); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.hpp new file mode 100644 index 0000000000..722dde129c --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefSplitterFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSplitterFloat32Workload : public Float32Workload<SplitterQueueDescriptor> +{ +public: + using Float32Workload<SplitterQueueDescriptor>::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.cpp new file mode 100644 index 0000000000..522a4463dd --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefSplitterUint8Workload.hpp" + +#include "Splitter.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterUint8Workload_Execute"); + Splitter<uint8_t>(m_Data); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.hpp new file mode 100644 index 0000000000..e28554951b --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefSplitterUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSplitterUint8Workload : public Uint8Workload<SplitterQueueDescriptor> +{ +public: + using Uint8Workload<SplitterQueueDescriptor>::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp b/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp new file mode 100644 index 0000000000..088fe819e5 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefWorkloadUtils.hpp @@ -0,0 +1,125 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/CpuTensorHandle.hpp" + +#include <armnn/Tensor.hpp> +#include <armnn/Types.hpp> + +#include <boost/polymorphic_cast.hpp> + +namespace armnn +{ + +//////////////////////////////////////////// +/// float32 helpers +//////////////////////////////////////////// + +inline const TensorInfo& GetTensorInfo(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. + const ConstCpuTensorHandle* cpuTensorHandle = + boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle); + return cpuTensorHandle->GetTensorInfo(); +} + +template <typename DataType> +inline const DataType* GetConstCpuData(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate. + const ConstCpuTensorHandle* cpuTensorHandle = + boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle); + return cpuTensorHandle->GetConstTensor<DataType>(); +} + +template <typename DataType> +inline DataType* GetCpuData(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. + const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast<const CpuTensorHandle*>(tensorHandle); + return cpuTensorHandle->GetTensor<DataType>(); +}; + +template <typename DataType, typename PayloadType> +const DataType* GetInputTensorData(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Inputs[idx]; + return GetConstCpuData<DataType>(tensorHandle); +} + +template <typename DataType, typename PayloadType> +DataType* GetOutputTensorData(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Outputs[idx]; + return GetCpuData<DataType>(tensorHandle); +} + +template <typename PayloadType> +const float* GetInputTensorDataFloat(unsigned int idx, const PayloadType& data) +{ + return GetInputTensorData<float>(idx, data); +} + +template <typename PayloadType> +float* GetOutputTensorDataFloat(unsigned int idx, const PayloadType& data) +{ + return GetOutputTensorData<float>(idx, data); +} + +//////////////////////////////////////////// +/// u8 helpers +//////////////////////////////////////////// + +inline const uint8_t* GetConstCpuU8Data(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate. + const ConstCpuTensorHandle* cpuTensorHandle = + boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle); + return cpuTensorHandle->GetConstTensor<uint8_t>(); +}; + +inline uint8_t* GetCpuU8Data(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. + const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast<const CpuTensorHandle*>(tensorHandle); + return cpuTensorHandle->GetTensor<uint8_t>(); +}; + +template <typename PayloadType> +const uint8_t* GetInputTensorDataU8(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Inputs[idx]; + return GetConstCpuU8Data(tensorHandle); +} + +template <typename PayloadType> +uint8_t* GetOutputTensorDataU8(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Outputs[idx]; + return GetCpuU8Data(tensorHandle); +} + +template<typename T> +std::vector<float> Dequantize(const T* quant, const TensorInfo& info) +{ + std::vector<float> ret(info.GetNumElements()); + for (size_t i = 0; i < info.GetNumElements(); i++) + { + ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); + } + return ret; +} + +inline void Quantize(uint8_t* quant, const float* dequant, const TensorInfo& info) +{ + for (size_t i = 0; i < info.GetNumElements(); i++) + { + quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp b/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp new file mode 100644 index 0000000000..7b386ed467 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/ResizeBilinear.cpp @@ -0,0 +1,92 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ResizeBilinear.hpp" + +#include "TensorBufferArrayView.hpp" + +#include <boost/numeric/conversion/cast.hpp> + +#include <cmath> +#include <algorithm> + +namespace armnn +{ + +namespace +{ + +inline float Lerp(float a, float b, float w) +{ + return w * b + (1.f - w) * a; +} + +} + +void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo) +{ + // We follow the definition of TensorFlow and AndroidNN: The top-left corner of a texel in the output + // image is projected into the input image to figure out the interpolants and weights. Note that this + // will yield different results than if projecting the centre of output texels. + + const unsigned int batchSize = inputInfo.GetShape()[0]; + const unsigned int channelCount = inputInfo.GetShape()[1]; + + const unsigned int inputHeight = inputInfo.GetShape()[2]; + const unsigned int inputWidth = inputInfo.GetShape()[3]; + const unsigned int outputHeight = outputInfo.GetShape()[2]; + const unsigned int outputWidth = outputInfo.GetShape()[3]; + + // How much to scale pixel coordinates in the output image to get the corresponding pixel coordinates + // in the input image + const float scaleY = boost::numeric_cast<float>(inputHeight) / boost::numeric_cast<float>(outputHeight); + const float scaleX = boost::numeric_cast<float>(inputWidth) / boost::numeric_cast<float>(outputWidth); + + TensorBufferArrayView<const float> input(inputInfo.GetShape(), in); + TensorBufferArrayView<float> output(outputInfo.GetShape(), out); + + for (unsigned int n = 0; n < batchSize; ++n) + { + for (unsigned int c = 0; c < channelCount; ++c) + { + for (unsigned int y = 0; y < outputHeight; ++y) + { + // Corresponding real-valued height coordinate in input image + const float iy = boost::numeric_cast<float>(y) * scaleY; + + // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation) + const float fiy = floorf(iy); + const unsigned int y0 = boost::numeric_cast<unsigned int>(fiy); + + // Interpolation weight (range [0,1]) + const float yw = iy - fiy; + + for (unsigned int x = 0; x < outputWidth; ++x) + { + // Real-valued and discrete width coordinates in input image + const float ix = boost::numeric_cast<float>(x) * scaleX; + const float fix = floorf(ix); + const unsigned int x0 = boost::numeric_cast<unsigned int>(fix); + + // Interpolation weight (range [0,1]) + const float xw = ix - fix; + + // Discrete width/height coordinates of texels below and to the right of (x0, y0) + const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u); + const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u); + + // Interpolation + const float ly0 = Lerp(input.Get(n, c, y0, x0), input.Get(n, c, y0, x1), xw); // lerp along row y0 + const float ly1 = Lerp(input.Get(n, c, y1, x0), input.Get(n, c, y1, x1), xw); // lerp along row y1 + const float l = Lerp(ly0, ly1, yw); + + output.Get(n, c, y, x) = l; + } + } + } + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/ResizeBilinear.hpp b/src/armnn/backends/RefWorkloads/ResizeBilinear.hpp new file mode 100644 index 0000000000..50e8128d18 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/ResizeBilinear.hpp @@ -0,0 +1,15 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <armnn/Tensor.hpp> + +namespace armnn +{ + +void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo); + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Softmax.cpp b/src/armnn/backends/RefWorkloads/Softmax.cpp new file mode 100644 index 0000000000..58840e3076 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Softmax.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "Softmax.hpp" + +#include <cmath> +#include <vector> + +namespace armnn +{ + +/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo +void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta) +{ + unsigned int numChannels = tensorInfo.GetShape()[1]; + for (unsigned int n = 0; n < tensorInfo.GetShape()[0]; n++) + { + // find maximum channel + float max = in[n * numChannels]; + for (unsigned int c = 1; c < numChannels; c++) + { + float val = in[n * numChannels + c]; + if (val > max) + { + max = val; + } + } + + // exponentiate all values and sum + std::vector<float> exponentials(numChannels); + float sum = 0.0f; + for (unsigned int c = 0; c < numChannels; c++) + { + float val = in[n * numChannels + c]; + exponentials[c] = expf((val - max) * beta); + sum += exponentials[c]; + } + + // divide exponentials by sum to give outputs + for (unsigned int c = 0; c < numChannels; c++) + { + out[n * numChannels + c] = exponentials[c] / sum; + } + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Softmax.hpp b/src/armnn/backends/RefWorkloads/Softmax.hpp new file mode 100644 index 0000000000..c508ab2b82 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Softmax.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include <armnn/Tensor.hpp> + +namespace armnn +{ + +/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo +void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta); + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Splitter.hpp b/src/armnn/backends/RefWorkloads/Splitter.hpp new file mode 100644 index 0000000000..67f6c100f9 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Splitter.hpp @@ -0,0 +1,83 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include "backends/WorkloadData.hpp" + +#include <armnn/Tensor.hpp> + +#include <boost/assert.hpp> + +namespace armnn +{ + +template <typename DataType> +void Splitter(const SplitterQueueDescriptor& data) +{ + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + + for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index) + { + unsigned int indices[MaxNumOfTensorDimensions]; + + unsigned int indexRemainder = index; + unsigned int dimensionStride = inputInfo0.GetNumElements(); + + for (unsigned int i = 0; i<inputInfo0.GetNumDimensions(); i++) + { + dimensionStride /= inputInfo0.GetShape()[i]; + indices[i] = indexRemainder / dimensionStride; // use integer division to round down + indexRemainder -= indices[i] * dimensionStride; + } + + for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx) + { + SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx]; + + //split view extents are defined by the size of (the corresponding) input tensor + const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]); + + // check all dimensions to see if this element is inside the given input view + bool insideView = true; + for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++) + { + if (indices[i] < view.m_Origin[i]) + { + insideView = false; + } + if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i]) + { + insideView = false; + } + } + + if (insideView) + { + unsigned int outIndex = 0; + unsigned int dimensionStride = 1; + + for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;) + { + outIndex += dimensionStride * (indices[i] - view.m_Origin[i]); + dimensionStride *= outputInfo.GetShape()[i]; + } + + //we are within the view, copy input data to the output corresponding to this view + DataType* outputData = GetOutputTensorData<DataType>(viewIdx, data); + BOOST_ASSERT(outputData); + + const DataType* inputData = GetInputTensorData<DataType>(0, data); + BOOST_ASSERT(inputData); + + outputData[outIndex] = inputData[index]; + } + } + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp b/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp new file mode 100644 index 0000000000..3994c1f1de --- /dev/null +++ b/src/armnn/backends/RefWorkloads/TensorBufferArrayView.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include <armnn/Tensor.hpp> + +#include <boost/assert.hpp> + +namespace armnn +{ + +// Utility class providing access to raw tensor memory based on indices along each dimension +template <typename DataType> +class TensorBufferArrayView +{ +public: + TensorBufferArrayView(const TensorShape& shape, DataType* data) + : m_Shape(shape) + , m_Data(data) + { + } + + DataType& Get(unsigned int b, unsigned int c, unsigned int h, unsigned int w) const + { + BOOST_ASSERT( b < m_Shape[0] || (m_Shape[0] == 0 && b == 0) ); + BOOST_ASSERT( c < m_Shape[1] || (m_Shape[1] == 0 && c == 0) ); + BOOST_ASSERT( h < m_Shape[2] || (m_Shape[2] == 0 && h == 0) ); + BOOST_ASSERT( w < m_Shape[3] || (m_Shape[3] == 0 && w == 0) ); + + return m_Data[b * m_Shape[1] * m_Shape[2] * m_Shape[3] + + c * m_Shape[2] * m_Shape[3] + + h * m_Shape[3] + + w]; + } + +private: + const TensorShape m_Shape; + DataType* m_Data; +}; + +} //namespace armnn diff --git a/src/armnn/backends/Workload.hpp b/src/armnn/backends/Workload.hpp new file mode 100644 index 0000000000..dbc7574d0e --- /dev/null +++ b/src/armnn/backends/Workload.hpp @@ -0,0 +1,80 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "WorkloadData.hpp" +#include "WorkloadInfo.hpp" +#include <algorithm> +#include "Profiling.hpp" + +namespace armnn +{ + +// Workload interface to enqueue a layer computation +class IWorkload +{ +public: + virtual ~IWorkload(){}; + + virtual void Execute() const = 0; +}; + +// NullWorkload used to denote an unsupported workload when used by the MakeWorkload<> template +// in the various workload factories. +// There should never be an instantiation of a NullWorkload. +class NullWorkload : public IWorkload +{ + NullWorkload()=delete; +}; + +template <typename QueueDescriptor> +class BaseWorkload : public IWorkload +{ +public: + + BaseWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info) + : m_Data(descriptor) + { + m_Data.Validate(info); + } + + const QueueDescriptor& GetData() const { return m_Data; } + +protected: + const QueueDescriptor m_Data; +}; + +template <typename QueueDescriptor, armnn::DataType DataType> +class TypedWorkload : public BaseWorkload<QueueDescriptor> +{ +public: + + TypedWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload<QueueDescriptor>(descriptor, info) + { + BOOST_ASSERT_MSG(std::all_of(info.m_InputTensorInfos.begin(), + info.m_InputTensorInfos.end(), + [&](auto it){ + return it.GetDataType() == DataType; + }), + "Trying to create workload with incorrect type"); + BOOST_ASSERT_MSG(std::all_of(info.m_OutputTensorInfos.begin(), + info.m_OutputTensorInfos.end(), + [&](auto it){ + return it.GetDataType() == DataType; + }), + "Trying to create workload with incorrect type"); + } + + static constexpr armnn::DataType ms_DataType = DataType; +}; + +template <typename QueueDescriptor> +using Float32Workload = TypedWorkload<QueueDescriptor, armnn::DataType::Float32>; + +template <typename QueueDescriptor> +using Uint8Workload = TypedWorkload<QueueDescriptor, armnn::DataType::QuantisedAsymm8>; + +} //namespace armnn diff --git a/src/armnn/backends/WorkloadData.cpp b/src/armnn/backends/WorkloadData.cpp new file mode 100644 index 0000000000..96a37802f1 --- /dev/null +++ b/src/armnn/backends/WorkloadData.cpp @@ -0,0 +1,753 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "WorkloadData.hpp" + +#include "CpuTensorHandle.hpp" +#include "WorkloadInfo.hpp" + +#include <algorithm> +#include <string> +#include <sstream> +#include <iomanip> + +#include <boost/format.hpp> + +namespace armnn +{ + +//--------------------------------------------------------------- +DataType GetBiasDataType(DataType inputDataType) +{ + switch (inputDataType) + { + case DataType::Float32: + return DataType::Float32; + case DataType::QuantisedAsymm8: + return DataType::Signed32; + default: + BOOST_ASSERT_MSG(false, "Invalid input data type"); + return DataType::Float32; + } +} + +namespace +{ + +//--------------------------------------------------------------- +//android ndk does not support std::to_string function. +template <typename T> +std::string to_string(T value) +{ + std::ostringstream os; + os << value; + return os.str(); +} + +//--------------------------------------------------------------- +void ValidatePointer(const void* ptr, std::string const& descName, std::string const& paramName) +{ + if (!ptr) + { + throw InvalidArgumentException(descName + ": Invalid null pointer. The " + + paramName + " parameter must be set."); + } +} + +//--------------------------------------------------------------- +void ValidateTensorShapesMatch(const TensorInfo& first, + const TensorInfo& second, + std::string const& descName, + std::string const& firstName, + std::string const& secondName) +{ + if (first.GetShape() != second.GetShape()) + { + throw InvalidArgumentException(descName + ": " + + firstName + " & " + secondName + " must have identical shapes"); + } +} + +//--------------------------------------------------------------- +void ValidateNoInputs(const WorkloadInfo& workloadInfo, std::string const& descName) +{ + if (workloadInfo.m_InputTensorInfos.size() != 0) + { + throw InvalidArgumentException(descName + + ": Requires no inputs. " + + to_string(workloadInfo.m_InputTensorInfos.size()) + " has been provided."); + } +} + +//--------------------------------------------------------------- +void ValidateSingleInput(const WorkloadInfo& workloadInfo, std::string const& descName) +{ + if (workloadInfo.m_InputTensorInfos.size() != 1) + { + throw InvalidArgumentException(descName + + ": Requires exactly one input. " + + to_string(workloadInfo.m_InputTensorInfos.size()) + " has been provided." ); + } +} + +//--------------------------------------------------------------- +void ValidateTwoInputs(const WorkloadInfo& workloadInfo, std::string const& descName) +{ + if (workloadInfo.m_InputTensorInfos.size() != 2) + { + throw InvalidArgumentException(descName + + ": Requires exactly two workloadInfo.m_InputTensorInfos. " + + to_string(workloadInfo.m_InputTensorInfos.size()) + " have been provided."); + } +} + +//--------------------------------------------------------------- +void ValidateSingleOutput(const WorkloadInfo& workloadInfo, std::string const& descName) +{ + if (workloadInfo.m_OutputTensorInfos.size() != 1) + { + throw InvalidArgumentException(descName + + ": Requires exactly one output. " + + to_string(workloadInfo.m_OutputTensorInfos.size()) + " has been provided."); + } +} + +//--------------------------------------------------------------- +void ValidateTensorNumDimensions(const TensorInfo& tensor, + std::string const& descName, + unsigned int numDimensions, + std::string const& tensorName) +{ + if (tensor.GetNumDimensions() != numDimensions) + { + throw InvalidArgumentException(descName + ": Expected " + to_string(numDimensions) + " but got " + + to_string(tensor.GetNumDimensions()) + " dimensions for " + + tensorName + " tensor."); + } +} + +//--------------------------------------------------------------- +void ValidateTensorDataType(const TensorInfo& tensor, DataType dataType, + const std::string& descName, std::string const& tensorName) +{ + if (tensor.GetDataType() != dataType) + { + throw InvalidArgumentException(descName + ": Expected data type " + GetDataTypeName(dataType) + " but got " + + GetDataTypeName(tensor.GetDataType()) + " for " + tensorName + " tensor."); + } +} + +//--------------------------------------------------------------- +void ValidateBiasTensorQuantization(const TensorInfo& biasTensor, const TensorInfo& inputTensorInfo, + const TensorInfo& weightsTensorInfo, const std::string& descName) +{ + if (biasTensor.GetQuantizationOffset() != 0) + { + throw InvalidArgumentException(descName + ": Expected zero quantization offset for bias tensor but got " + + to_string(biasTensor.GetQuantizationOffset())); + } + const float expectedScale = inputTensorInfo.GetQuantizationScale() * weightsTensorInfo.GetQuantizationScale(); + if (biasTensor.GetQuantizationScale() != expectedScale) + { + // Print the float values with extra precision to see very small differences + std::stringstream msg; + msg << std::setprecision(10) << descName << ": Expected " << expectedScale << + " quantization scale for bias tensor (the product of the input and weight scales), but got " << + biasTensor.GetQuantizationScale(); + throw InvalidArgumentException(msg.str()); + } +} + +//--------------------------------------------------------------- +void ValidateTensors(const std::vector<ITensorHandle*>& vec, + unsigned int numExpected, + const std::string& descName, + const std::string& varName) +{ + if (vec.empty() && numExpected > 0) + { + throw InvalidArgumentException(descName + ": Invalid empty " + varName + " array."); + } + + for (unsigned int i = 0; i < numExpected; ++i) + { + if (!vec[i]) + { + throw InvalidArgumentException(descName + ": Invalid NULL for " + varName + to_string(i)); + } + } +} + +//--------------------------------------------------------------- +void ValidateBroadcastTensorShapesMatch(const TensorInfo& first, + const TensorInfo& second, + const TensorInfo& output, + std::string const& descName, + std::string const& firstName, + std::string const& secondName) +{ + // Tensors must have the same number of dimensions in order to be explicit about which dimensions will get + // broadcasted. + if (first.GetNumDimensions() != second.GetNumDimensions()) + { + throw InvalidArgumentException(descName + ": Tensors " + + firstName + " & " + secondName + + " must have the same number of dimensions in order to be broadcasted"); + } + uint32_t numDims = first.GetNumDimensions(); + std::vector<uint32_t> outputDims(numDims, 0u); + for (uint32_t i = 0; i < numDims; i++) + { + const bool dimsNotEqual = first.GetShape()[i] != second.GetShape()[i]; + const bool dimsNotOne = (first.GetShape()[i] != 1) && (second.GetShape()[i] != 1); + if (dimsNotEqual && dimsNotOne) + { + throw InvalidArgumentException("Broadcasting is not possible for incompatible shapes"); + } + outputDims[i] = std::max(first.GetShape()[i], second.GetShape()[i]); + } + TensorShape broadcastShape = TensorShape(boost::numeric_cast<unsigned int>(outputDims.size()), outputDims.data()); + if (broadcastShape != output.GetShape()) + { + throw InvalidArgumentException(descName + ": The tensor shape resulting from adding " + + firstName + " & " + secondName + + " does not match the output shape"); + } +} + +//--------------------------------------------------------------- +/// Validates that the output tensor's quantization scale is greater than the product +/// of the two input tensors' quantization scales. This is a requirement of the implementation of +/// the quantized multiplication. +void ValidateTensorQuantizationMultiplier(const TensorInfo& inputTensor1, const TensorInfo& inputTensor2, + const TensorInfo& outputTensorInfo, std::string const& descName, + const std::string& inputTensor1Name, const std::string& inputTensor2Name, const std::string& outputTensorName) +{ + if (outputTensorInfo.GetDataType() == DataType::QuantisedAsymm8) + { + if (outputTensorInfo.GetQuantizationScale() <= + inputTensor1.GetQuantizationScale() * inputTensor2.GetQuantizationScale()) + { + std::stringstream msg; + msg << descName << ": Quantization scale of " << outputTensorName << " is not greater than " << + "the product of the " << inputTensor1Name << " and " << inputTensor2Name << " tensors"; + throw InvalidArgumentException(msg.str()); + } + } +} + +} //namespace + +void QueueDescriptor::ValidateInputsOutputs(const std::string& descName, + unsigned int numExpectedIn, unsigned int numExpectedOut) const +{ + ValidateTensors(m_Inputs, numExpectedIn, descName, "input"); + ValidateTensors(m_Outputs, numExpectedOut, descName, "output"); +} + +//--------------------------------------------------------------- +void MemCopyQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "MemCopyQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "MemCopyQueueDescriptor"); + + if (workloadInfo.m_InputTensorInfos.size() != workloadInfo.m_OutputTensorInfos.size()) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of input infos (%1%) does not match the number of output infos (%2%)") + % workloadInfo.m_InputTensorInfos.size() % workloadInfo.m_OutputTensorInfos.size())); + } + + for (std::size_t i = 0; i < workloadInfo.m_InputTensorInfos.size(); ++i) + { + if (workloadInfo.m_InputTensorInfos[i].GetNumElements() != + workloadInfo.m_OutputTensorInfos[i].GetNumElements()) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of elements for tensor input and output %1% does not match") + % i )); + } + } + + if (m_Inputs.size() != m_Outputs.size()) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of inputs (%1%) does not match the number of outputs (%2%)") + % m_Inputs.size() % m_Outputs.size())); + } + + for (unsigned int i = 0; i < m_Inputs.size(); ++i) + { + if (!m_Inputs[i]) + { + throw InvalidArgumentException(boost::str(boost::format("Invalid null input %1%") % i)); + } + + if (!m_Outputs[i]) + { + throw InvalidArgumentException(boost::str(boost::format("Invalid null output %1%") % i)); + } + } +} + +//--------------------------------------------------------------- +void ActivationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "ActivationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "ActivationQueueDescriptor"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "ActivationQueueDescriptor", + "input", + "output"); +} + +//--------------------------------------------------------------- +void SoftmaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "SoftmaxQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "SoftmaxQueueDescriptor"); + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "SoftmaxQueueDescriptor", 2, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "SoftmaxQueueDescriptor", 2, "output"); + + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "SoftmaxQueueDescriptor", + "input", + "output"); +} + +//--------------------------------------------------------------- +void SplitterQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "SplitterQueueDescriptor"); + + if (workloadInfo.m_OutputTensorInfos.size() <= 0) + { + throw InvalidArgumentException("SplitterQueueDescriptor: At least one output needs to be provided."); + } + + if (workloadInfo.m_OutputTensorInfos.size() != m_ViewOrigins.size()) + { + throw InvalidArgumentException( + "SplitterQueueDescriptor: Number of split windows " + "has to match number of workloadInfo.m_OutputTensorInfos. " + "Number of windows: " + + to_string(m_ViewOrigins.size()) + + ". Number of workloadInfo.m_OutputTensorInfos: " + to_string(workloadInfo.m_OutputTensorInfos.size())); + } + + //the dimensionality of all the windows has to match the dimensionality (not shape) of the input + std::size_t inputDims = workloadInfo.m_InputTensorInfos[0].GetNumDimensions(); + for(unsigned int w = 0; w < m_ViewOrigins.size(); ++w ) + { + //check that the dimensionality of input is same as the split windows + ViewOrigin const& e = m_ViewOrigins[w]; + if (e.m_Origin.size() != inputDims) + { + throw InvalidArgumentException("SplitterQueueDescriptor: Window origin have to " + "have the same dimensionality as the input tensor. " + "Window origin (index: " + + to_string(w) + ") has " + to_string(e.m_Origin.size()) + + " dimensions, the input " + "tensor has " + + to_string(inputDims) + " dimensions."); + } + for (unsigned int i = 0; i < e.m_Origin.size(); ++i) + { + if (e.m_Origin[i] + workloadInfo.m_OutputTensorInfos[w].GetShape()[i] > + workloadInfo.m_InputTensorInfos[0].GetShape()[i]) + { + throw InvalidArgumentException("SplitterQueueDescriptor: Window extent coordinates have to " + "be smaller or equal than the size of the input in that coord."); + } + } + } +} + +//--------------------------------------------------------------- +void MergerQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleOutput(workloadInfo, "MergerQueueDescriptor"); + + if (m_Inputs.size() <= 0) + { + throw InvalidArgumentException("MergerQueueDescriptor: At least one input needs to be provided."); + } + if (m_Outputs.size() <= 0) + { + throw InvalidArgumentException("MergerQueueDescriptor: At least one output needs to be provided."); + } + + if (workloadInfo.m_InputTensorInfos.size() <= 0) + { + throw InvalidArgumentException("MergerQueueDescriptor: At least one TensorInfo input needs to be provided."); + } + if (workloadInfo.m_OutputTensorInfos.size() <= 0) + { + throw InvalidArgumentException("MergerQueueDescriptor: At least one TensorInfo output needs to be provided."); + } + + if (workloadInfo.m_InputTensorInfos.size() != m_ViewOrigins.size()) + { + throw InvalidArgumentException( + "MergerQueueDescriptor: Number of split windows " + "has to match number of workloadInfo.m_InputTensorInfos. " + "Number of windows: " + + to_string(m_ViewOrigins.size()) + + ". Number of workloadInfo.m_InputTensorInfos: " + to_string(workloadInfo.m_InputTensorInfos.size())); + } + + //the dimensionality of all the windows has to match the dimensionality (not shape) of the output + std::size_t outputDims = workloadInfo.m_OutputTensorInfos[0].GetNumDimensions(); + for(unsigned int w = 0; w < m_ViewOrigins.size(); ++w ) + { + //check that the dimensionality of output is same as the split windows + ViewOrigin const& e = m_ViewOrigins[w]; + if (e.m_Origin.size() != outputDims) + { + throw InvalidArgumentException("MergerQueueDescriptor: Window origin have to " + "have the same dimensionality as the output tensor. " + "Window origin (index: " + + to_string(w) + ") has " + to_string(e.m_Origin.size()) + + " dimensions, the output " + "tensor has " + + to_string(outputDims) + " dimensions."); + } + //check that the merge windows are within the output tensor + for (unsigned int i = 0; i < e.m_Origin.size(); ++i) + { + if (e.m_Origin[i] + workloadInfo.m_InputTensorInfos[w].GetShape()[i] + > workloadInfo.m_OutputTensorInfos[0].GetShape()[i]) + { + throw InvalidArgumentException("MergerQueueDescriptor: Window extent coordinates have to " + "be smaller or equal than the size of the output in that coord."); + } + } + } +} + +//--------------------------------------------------------------- +void FullyConnectedQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "FullyConnectedQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "FullyConnectedQueueDescriptor"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "FullyConnectedQueueDescriptor", 2, "output"); + + if (!(workloadInfo.m_InputTensorInfos[0].GetNumDimensions() == 2 || + workloadInfo.m_InputTensorInfos[0].GetNumDimensions() == 4)) + { + throw InvalidArgumentException("FullyConnectedQueueDescriptor: Input tensor must have 2 or 4 dimensions."); + } + + if (m_Weight == nullptr) + { + throw InvalidArgumentException("FullyConnectedQueueDescriptor: Weight tensor descriptor is missing."); + } + + ValidateTensorNumDimensions(m_Weight->GetTensorInfo(), "FullyConnectedQueueDescriptor", 2, "weight"); + + if (m_Parameters.m_BiasEnabled) + { + if (m_Bias == nullptr) + { + throw InvalidArgumentException("FullyConnectedQueueDescriptor: Bias is enabled but " + "bias value tensor descriptor is missing."); + } + + // validate type and quantization values + ValidateBiasTensorQuantization(m_Bias->GetTensorInfo(), + workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), "FullyConnectedQueueDescriptor"); + + ValidateTensorDataType(m_Bias->GetTensorInfo(), + GetBiasDataType(workloadInfo.m_InputTensorInfos[0].GetDataType()), + "FullyConnectedQueueDescriptor", "bias"); + + ValidateTensorNumDimensions(m_Bias->GetTensorInfo(), "FullyConnectedQueueDescriptor", 1, "bias"); + } + + ValidateTensorQuantizationMultiplier(workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), + workloadInfo.m_OutputTensorInfos[0], "FullyConnectedQueueDescriptor", "input", "weights", "output"); +} + +//--------------------------------------------------------------- +void NormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "NormalizationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "NormalizationQueueDescriptor"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "NormalizationQueueDescriptor", + "input", + "output"); +} + +void AdditionQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateTwoInputs(workloadInfo, "AdditionQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "AdditionQueueDescriptor"); + + ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_InputTensorInfos[1], + workloadInfo.m_OutputTensorInfos[0], + "AdditionQueueDescriptor", + "first input", + "second input"); + +} + +//--------------------------------------------------------------- +void MultiplicationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateTwoInputs(workloadInfo, "MultiplicationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "MultiplicationQueueDescriptor"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_InputTensorInfos[1], + "MultiplicationQueueDescriptor", + "first input", + "second input"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "MultiplicationQueueDescriptor", + "input", + "output"); +} + +void BatchNormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "BatchNormalizationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "BatchNormalizationQueueDescriptor"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "BatchNormalizationQueueDescriptor", + "input", + "output"); + ValidatePointer(m_Mean, "BatchNormalizationQueueDescriptor", "mean"); + ValidatePointer(m_Variance, "BatchNormalizationQueueDescriptor", "variance"); + ValidatePointer(m_Beta, "BatchNormalizationQueueDescriptor", "beta"); + ValidatePointer(m_Gamma, "BatchNormalizationQueueDescriptor", "gamma"); + + + ValidateTensorNumDimensions(m_Mean->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "mean"); + ValidateTensorNumDimensions(m_Variance->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "variance"); + ValidateTensorNumDimensions(m_Beta->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "beta"); + ValidateTensorNumDimensions(m_Gamma->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "gamma"); + + ValidateTensorShapesMatch( + m_Mean->GetTensorInfo(), m_Variance->GetTensorInfo(), "BatchNormalizationQueueDescriptor", "mean", "variance"); + ValidateTensorShapesMatch( + m_Mean->GetTensorInfo(), m_Beta->GetTensorInfo(), "BatchNormalizationQueueDescriptor", "mean", "beta"); + ValidateTensorShapesMatch( + m_Mean->GetTensorInfo(), m_Gamma->GetTensorInfo(), "BatchNormalizationQueueDescriptor", "mean", "gamma"); +} + +void Convolution2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "Convolution2dQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "Convolution2dQueueDescriptor"); + + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "Convolution2dQueueDescriptor", 4, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "Convolution2dQueueDescriptor", 4, "output"); + + ValidatePointer(m_Weight, "Convolution2dQueueDescriptor", "weight"); + ValidateTensorNumDimensions(m_Weight->GetTensorInfo(), "Convolution2dQueueDescriptor", 4, "weight"); + ValidateTensorDataType(m_Weight->GetTensorInfo(), workloadInfo.m_InputTensorInfos[0].GetDataType(), + "Convolution2dQueueDescriptor", "weight"); + if (m_Parameters.m_BiasEnabled) + { + ValidateTensorNumDimensions(m_Bias->GetTensorInfo(), "Convolution2dQueueDescriptor", 1, "bias"); + ValidateTensorDataType(m_Bias->GetTensorInfo(), + GetBiasDataType(workloadInfo.m_InputTensorInfos[0].GetDataType()), + "Convolution2dQueueDescriptor", "bias"); + ValidateBiasTensorQuantization(m_Bias->GetTensorInfo(), + workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), "Convolution2dQueueDescriptor"); + } + + ValidateTensorQuantizationMultiplier(workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), + workloadInfo.m_OutputTensorInfos[0], "Convolution2dQueueDescriptor", "input", "weights", "output"); +} + +void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "DepthwiseConvolution2dQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "DepthwiseConvolution2dQueueDescriptor"); + + ValidateTensorNumDimensions( + workloadInfo.m_InputTensorInfos[0], "DepthwiseConvolution2dQueueDescriptor", 4, "input"); + ValidateTensorNumDimensions( + workloadInfo.m_OutputTensorInfos[0], "DepthwiseConvolution2dQueueDescriptor", 4, "output"); + + ValidatePointer(m_Weight, "DepthwiseConvolution2dQueueDescriptor", "weight"); + ValidateTensorNumDimensions(m_Weight->GetTensorInfo(), "DepthwiseConvolution2dQueueDescriptor", 4, "weight"); + + //inputChannels * channelMultiplier should be equal to outputChannels + const unsigned int numWeightChannelMultiplier = m_Weight->GetTensorInfo().GetShape()[0]; + const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[1]; + const unsigned int numWeightOutputChannels = workloadInfo.m_OutputTensorInfos[0].GetShape()[1]; + if (numWeightChannelMultiplier * numWeightInputChannels != numWeightOutputChannels) + { + throw InvalidArgumentException( + boost::str(boost::format("DepthwiseConvolution2dQueueDescriptor: output_channels (provided %1%) should be " + "equal to input_channels (provided %2%) multiplied by channel_multiplier " + "(provided %3%).") + % numWeightOutputChannels % numWeightInputChannels % numWeightChannelMultiplier)); + } + + if (m_Parameters.m_BiasEnabled) + { + ValidatePointer(m_Bias, "DepthwiseConvolution2dQueueDescriptor", "bias"); + ValidateTensorNumDimensions(m_Bias->GetTensorInfo(), "DepthwiseConvolution2dQueueDescriptor", 1, "bias"); + ValidateBiasTensorQuantization(m_Bias->GetTensorInfo(), + workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), "DepthwiseConvolution2dQueueDescriptor"); + + ValidateTensorDataType(m_Bias->GetTensorInfo(), + GetBiasDataType(workloadInfo.m_InputTensorInfos[0].GetDataType()), + "DepthwiseConvolution2dQueueDescriptor", "bias"); + } + + ValidateTensorQuantizationMultiplier(workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), + workloadInfo.m_OutputTensorInfos[0], "DepthwiseConvolution2dQueueDescriptor", "input", "weights", "output"); +} + +void PermuteQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "PermuteQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "PermuteQueueDescriptor"); + + const PermutationVector& mapping = m_Parameters.m_DimMappings; + + const TensorInfo& input = workloadInfo.m_InputTensorInfos[0]; + const TensorInfo& output = workloadInfo.m_OutputTensorInfos[0]; + + ValidateTensorNumDimensions(input, "PermuteQueueDescriptor", mapping.GetSize(), "input"); + ValidateTensorNumDimensions(output, "PermuteQueueDescriptor", mapping.GetSize(), "output"); + + for (unsigned int i = 0; i < mapping.GetSize(); ++i) + { + if (input.GetShape()[i] != output.GetShape()[mapping[i]]) + { + throw InvalidArgumentException("PermuteQueueDescriptor: src dimension " + to_string(i) + + " (=" + to_string(input.GetShape()[i]) + ") " + + "must match dst dimension " + to_string(mapping[i]) + + " (=" + to_string(output.GetShape()[mapping[i]]) + ")"); + } + } +} + +void Pooling2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "Pooling2dQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "Pooling2dQueueDescriptor"); + + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "Pooling2dQueueDescriptor", 4, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "Pooling2dQueueDescriptor", 4, "output"); +} + +void ResizeBilinearQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "ResizeBilinearQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "ResizeBilinearQueueDescriptor"); + + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "ResizeBilinearQueueDescriptor", 4, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "ResizeBilinearQueueDescriptor", 4, "output"); + + // Resize bilinear only changes width and height: batch and channel count must match + { + const unsigned int inputBatchSize = workloadInfo.m_InputTensorInfos[0].GetShape()[0]; + const unsigned int outputBatchSize = workloadInfo.m_OutputTensorInfos[0].GetShape()[0]; + if (inputBatchSize != outputBatchSize) + { + throw InvalidArgumentException( + boost::str(boost::format("ResizeBilinearQueueDescriptor: Input batch size (%1%) " + "does not match output batch size (%2%)") % inputBatchSize % outputBatchSize)); + } + } + + { + const unsigned int inputChannelCount = workloadInfo.m_InputTensorInfos[0].GetShape()[1]; + const unsigned int outputChannelCount = workloadInfo.m_OutputTensorInfos[0].GetShape()[1]; + if (inputChannelCount != outputChannelCount) + { + throw InvalidArgumentException( + boost::str(boost::format("ResizeBilinearQueueDescriptor: Input channel count (%1%) " + "does not match output channel count (%2%)") % inputChannelCount % outputChannelCount)); + } + } +} + +void FakeQuantizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "FakeQuantizationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "FakeQuantizationQueueDescriptor"); + + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "FakeQuantizationQueueDescriptor", 2, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "FakeQuantizationQueueDescriptor", 2, "output"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "FakeQuantizationQueueDescriptor", + "input", + "output"); + if (m_Parameters.m_Min > m_Parameters.m_Max) + { + throw InvalidArgumentException("FakeQuantizationQueueDescriptor: min cannot be greater than max"); + } + +} + +void L2NormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "L2NormalizationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "L2NormalizationQueueDescriptor"); + + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "L2NormalizationQueueDescriptor", 4, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "L2NormalizationQueueDescriptor", 4, "output"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "L2NormalizationQueueDescriptor", + "input", + "output"); +} + +void ConstantQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateNoInputs(workloadInfo, "ConstantQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "ConstantQueueDescriptor"); + + if (!m_LayerOutput) + { + throw InvalidArgumentException("ConstantQueueDescriptor: No const input specified"); + } + + ValidateTensorShapesMatch(m_LayerOutput->GetTensorInfo(), + workloadInfo.m_OutputTensorInfos[0], + "ConstantQueueDescriptor", + "constant", + "output"); +} + +void ReshapeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "ReshapeQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "ReshapeQueueDescriptor"); + + if (workloadInfo.m_InputTensorInfos[0].GetNumElements() != workloadInfo.m_OutputTensorInfos[0].GetNumElements()) + { + throw InvalidArgumentException("ReshapeQueueDescriptor: Input tensor has " + + to_string(workloadInfo.m_InputTensorInfos[0].GetNumElements()) + " but output tensor has " + + to_string(workloadInfo.m_OutputTensorInfos[0].GetNumElements()) + " elements."); + } +} + +void FloorQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "FloorQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "FlootQueueDescriptor"); + + if (workloadInfo.m_InputTensorInfos[0] != workloadInfo.m_OutputTensorInfos[0]) + { + throw InvalidArgumentException("FloorQueueDescriptor: Input and output tensor infos do not match."); + } +} + +} //namespace armnn diff --git a/src/armnn/backends/WorkloadData.hpp b/src/armnn/backends/WorkloadData.hpp new file mode 100644 index 0000000000..7f8713582f --- /dev/null +++ b/src/armnn/backends/WorkloadData.hpp @@ -0,0 +1,252 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "WorkloadDataFwd.hpp" + +#include "armnn/Types.hpp" +#include "armnn/Tensor.hpp" +#include "armnn/Descriptors.hpp" +#include "armnn/Exceptions.hpp" +#include "InternalTypes.hpp" +#include "OutputHandler.hpp" +#include "CpuTensorHandleFwd.hpp" + +namespace armnn +{ + +//a helper function that returns the bias data type required for given input data type. +DataType GetBiasDataType(DataType inputDataType); + +struct WorkloadInfo; + +struct QueueDescriptor +{ + std::vector<ITensorHandle*> m_Inputs; + std::vector<ITensorHandle*> m_Outputs; + + void ValidateInputsOutputs(const std::string& descName, + unsigned int numExpectedIn, unsigned int numExpectedOut) const; + + +protected: + ~QueueDescriptor() = default; + QueueDescriptor() = default; + QueueDescriptor(QueueDescriptor const&) = default; + QueueDescriptor& operator=(QueueDescriptor const&) = default; +}; + +// Base class for queue descriptors which contain parameters +template <typename LayerDescriptor> +struct QueueDescriptorWithParameters : public QueueDescriptor +{ + LayerDescriptor m_Parameters; + +protected: + ~QueueDescriptorWithParameters() = default; + QueueDescriptorWithParameters() = default; + QueueDescriptorWithParameters(QueueDescriptorWithParameters const&) = default; + QueueDescriptorWithParameters& operator=(QueueDescriptorWithParameters const&) = default; +}; + +struct MemCopyQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +using InputQueueDescriptor = MemCopyQueueDescriptor; +using OutputQueueDescriptor = MemCopyQueueDescriptor; + +// Softmax layer workload data +struct SoftmaxQueueDescriptor : QueueDescriptorWithParameters<SoftmaxDescriptor> +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Splitter layer workload data +struct SplitterQueueDescriptor : QueueDescriptorWithParameters<ViewsDescriptor> +{ + struct ViewOrigin + { + ViewOrigin() {} + ViewOrigin(std::vector<unsigned int> const& origin) : m_Origin(origin) {} + + //view origin (size of the vector is the same as number of dimensions of the view) + std::vector<unsigned int> m_Origin; + }; + + //view defines a tensor that will be carved from the input tensor. + //view origins are stored here, the extents are defined by sizes of the output tensors. + std::vector<ViewOrigin> m_ViewOrigins; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Merger layer workload data +struct MergerQueueDescriptor : QueueDescriptorWithParameters<OriginsDescriptor> +{ + struct ViewOrigin + { + ViewOrigin() {} + ViewOrigin(const std::vector<unsigned int>& origin) : m_Origin(origin) {} + + //view origin (size of the vector is the same as number of dimensions of the view) + std::vector<unsigned int> m_Origin; + }; + + //view defines a sub-area of the output tensor that will be filled with the corresponding input tensor. + //view origins are stored here, the extents are defined by sizes of the input tensors. + std::vector<ViewOrigin> m_ViewOrigins; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Activation layer workload data +struct ActivationQueueDescriptor : QueueDescriptorWithParameters<ActivationDescriptor> +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Fully connected layer workload data +struct FullyConnectedQueueDescriptor : QueueDescriptorWithParameters<FullyConnectedDescriptor> +{ + FullyConnectedQueueDescriptor() + : m_Weight(nullptr) + , m_Bias(nullptr) + { + } + + const ConstCpuTensorHandle* m_Weight; + const ConstCpuTensorHandle* m_Bias; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Permute layer workload data +struct PermuteQueueDescriptor : QueueDescriptorWithParameters<PermuteDescriptor> +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Pooling 2D layer workload data +struct Pooling2dQueueDescriptor : QueueDescriptorWithParameters<Pooling2dDescriptor> +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Convolution 2D layer workload data +struct Convolution2dQueueDescriptor : QueueDescriptorWithParameters<Convolution2dDescriptor> +{ + Convolution2dQueueDescriptor() + : m_Weight(nullptr) + , m_Bias(nullptr) + { + } + + const ConstCpuTensorHandle* m_Weight; + const ConstCpuTensorHandle* m_Bias; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Depthwise Convolution 2D layer workload data +struct DepthwiseConvolution2dQueueDescriptor : QueueDescriptorWithParameters<DepthwiseConvolution2dDescriptor> +{ + DepthwiseConvolution2dQueueDescriptor() + : m_Weight(nullptr) + , m_Bias(nullptr) + { + } + + const ConstCpuTensorHandle* m_Weight; + const ConstCpuTensorHandle* m_Bias; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Normalization layer workload data +struct NormalizationQueueDescriptor : QueueDescriptorWithParameters<NormalizationDescriptor> +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Add layer workload data +struct AdditionQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Multiplication layer workload data +struct MultiplicationQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Batch norm layer workload data +struct BatchNormalizationQueueDescriptor : QueueDescriptorWithParameters<BatchNormalizationDescriptor> +{ + BatchNormalizationQueueDescriptor() + : m_Mean(nullptr) + , m_Variance(nullptr) + , m_Beta(nullptr) + , m_Gamma(nullptr) + { + } + + const ConstCpuTensorHandle* m_Mean; + const ConstCpuTensorHandle* m_Variance; + const ConstCpuTensorHandle* m_Beta; + const ConstCpuTensorHandle* m_Gamma; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct ResizeBilinearQueueDescriptor : QueueDescriptorWithParameters<ResizeBilinearDescriptor> +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct FakeQuantizationQueueDescriptor : QueueDescriptorWithParameters<FakeQuantizationDescriptor> +{ + FakeQuantizationQueueDescriptor() + : m_Min(nullptr) + , m_Max(nullptr) + { + } + + const ConstCpuTensorHandle* m_Min; + const ConstCpuTensorHandle* m_Max; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct L2NormalizationQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct ConstantQueueDescriptor : QueueDescriptor +{ + ConstantQueueDescriptor() + : m_LayerOutput(nullptr) + { + } + + const ConstCpuTensorHandle* m_LayerOutput; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct ReshapeQueueDescriptor : QueueDescriptorWithParameters<ReshapeDescriptor> +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct FloorQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +} //namespace armnn diff --git a/src/armnn/backends/WorkloadDataCollector.hpp b/src/armnn/backends/WorkloadDataCollector.hpp new file mode 100644 index 0000000000..4dfd0ea5f4 --- /dev/null +++ b/src/armnn/backends/WorkloadDataCollector.hpp @@ -0,0 +1,36 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/Tensor.hpp> + +#include <vector> + +namespace armnn +{ +class ITensorHandle; + +class WorkloadDataCollector +{ +public: + WorkloadDataCollector(std::vector<ITensorHandle*>& handles, std::vector<TensorInfo>& infos) + : m_Handles(handles) + , m_Infos(infos) + { + } + + void Push(ITensorHandle* handle, const TensorInfo& info) + { + m_Handles.push_back(handle); + m_Infos.push_back(info); + } + +private: + std::vector<ITensorHandle*>& m_Handles; + std::vector<TensorInfo>& m_Infos; +}; + + +} //namespace armnn diff --git a/src/armnn/backends/WorkloadDataFwd.hpp b/src/armnn/backends/WorkloadDataFwd.hpp new file mode 100644 index 0000000000..1b466b69ca --- /dev/null +++ b/src/armnn/backends/WorkloadDataFwd.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +namespace armnn +{ + +struct QueueDescriptor; +template <typename LayerDescriptor> +struct QueueDescriptorWithParameters; +struct SoftmaxQueueDescriptor; +struct SplitterQueueDescriptor; +struct MergerQueueDescriptor; +struct ActivationQueueDescriptor; +struct FullyConnectedQueueDescriptor; +struct PermuteQueueDescriptor; +struct Pooling2dQueueDescriptor; +struct Convolution2dQueueDescriptor; +struct NormalizationQueueDescriptor; +struct MultiplicationQueueDescriptor; +struct BatchNormalizationQueueDescriptor; +struct FakeQuantizationQueueDescriptor; +struct ReshapeQueueDescriptor; + +} // namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/WorkloadFactory.cpp b/src/armnn/backends/WorkloadFactory.cpp new file mode 100644 index 0000000000..32634a6d0f --- /dev/null +++ b/src/armnn/backends/WorkloadFactory.cpp @@ -0,0 +1,214 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "WorkloadFactory.hpp" +#include "RefWorkloadFactory.hpp" +#include "NeonWorkloadFactory.hpp" +#include "ClWorkloadFactory.hpp" + +#include "armnn/Types.hpp" +#include "armnn/LayerSupport.hpp" +#include "Layer.hpp" +#include "Layers.hpp" +#include "CpuTensorHandle.hpp" + +#include <boost/cast.hpp> +#include <cstring> +#include <boost/iterator/transform_iterator.hpp> + +namespace armnn +{ + +bool IWorkloadFactory::IsLayerSupported(Compute compute, const Layer& layer, DataType dataType, + std::string& outReasonIfUnsupported) +{ + constexpr size_t reasonCapacity = 1024; + char reason[reasonCapacity]; + bool result; + switch(layer.GetType()) + { + case LayerType::Activation: + { + auto cLayer = boost::polymorphic_downcast<const ActivationLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsActivationSupported(compute, input, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Addition: + { + const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsAdditionSupported(compute, input0, input1, output, reason, reasonCapacity); + break; + } + case LayerType::BatchNormalization: + { + auto cLayer = boost::polymorphic_downcast<const BatchNormalizationLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsBatchNormalizationSupported(compute, input, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Constant: + { + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsConstantSupported(compute, output, reason, reasonCapacity); + break; + } + case LayerType::Convolution2d: + { + auto cLayer = boost::polymorphic_downcast<const Convolution2dLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsConvolution2dSupported(compute, input, cLayer->GetParameters(), + cLayer->m_Weight->GetTensorInfo(), reason, reasonCapacity); + break; + } + case LayerType::MemCopy: + { + // MemCopy supported for CpuRef, CpuAcc and GpuAcc backends + // (also treat Undefined as CpuRef to avoid breaking lots of Unit tests) + result = compute == Compute::CpuRef || compute == Compute::Undefined + || compute == Compute::CpuAcc || compute == Compute::GpuAcc; + strcpy(reason, "Unsupported backend type"); + break; + } + case LayerType::DepthwiseConvolution2d: + { + auto cLayer = boost::polymorphic_downcast<const DepthwiseConvolution2dLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsDepthwiseConvolutionSupported(compute, input, cLayer->GetParameters(), + cLayer->m_Weight->GetTensorInfo(), reason, reasonCapacity); + break; + } + case LayerType::FakeQuantization: + { + auto cLayer = boost::polymorphic_downcast<const FakeQuantizationLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsFakeQuantizationSupported(compute, input, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Floor: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsFloorSupported(compute, input, output, reason, reasonCapacity); + break; + } + case LayerType::FullyConnected: + { + auto cLayer = boost::polymorphic_downcast<const FullyConnectedLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsFullyConnectedSupported(compute, input, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Input: + { + const TensorInfo& input = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsInputSupported(compute, input, reason, reasonCapacity); + break; + } + case LayerType::L2Normalization: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsL2NormalizationSupported(compute, input, reason, reasonCapacity); + break; + } + case LayerType::Merger: + { + auto cLayer = boost::polymorphic_downcast<const MergerLayer*>(&layer); + + // Get vector of all inputs + auto getTensorInfo = [](const InputSlot& slot) + { + return &slot.GetConnectedOutputSlot()->GetTensorInfo(); + }; + auto begin = boost::make_transform_iterator(layer.GetInputSlots().begin(), getTensorInfo); + auto end = boost::make_transform_iterator(layer.GetInputSlots().end(), getTensorInfo); + + std::vector<const TensorInfo*> inputs(begin, end); + + result = IsMergerSupported(compute, inputs, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Multiplication: + { + const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); + result = IsMultiplicationSupported(compute, input0, input1, reason, reasonCapacity); + break; + } + case LayerType::Normalization: + { + auto cLayer = boost::polymorphic_downcast<const NormalizationLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsNormalizationSupported(compute, input, output, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Output: + { + const TensorInfo& output = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsOutputSupported(compute, output, reason, reasonCapacity); + break; + } + case LayerType::Permute: + { + auto cLayer = boost::polymorphic_downcast<const PermuteLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsPermuteSupported(compute, input, output, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Pooling2d: + { + auto cLayer = boost::polymorphic_downcast<const Pooling2dLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsPooling2dSupported(compute, input, output, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Reshape: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsReshapeSupported(compute, input, reason, reasonCapacity); + break; + } + case LayerType::ResizeBilinear: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsResizeBilinearSupported(compute, input, reason, reasonCapacity); + break; + } + case LayerType::Softmax: + { + auto cLayer = boost::polymorphic_downcast<const SoftmaxLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsSoftmaxSupported(compute, input, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Splitter: + { + auto cLayer = boost::polymorphic_downcast<const SplitterLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsSplitterSupported(compute, input, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "WorkloadFactory did not recognise type of layer."); + strcpy(reason, "Unrecognised layer type"); + result = false; + break; + } + } + outReasonIfUnsupported = reason; + return result; +} + +bool IWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported) +{ + return IsLayerSupported(layer.GetComputeDevice(), layer, dataType, outReasonIfUnsupported); +} + +}
\ No newline at end of file diff --git a/src/armnn/backends/WorkloadFactory.hpp b/src/armnn/backends/WorkloadFactory.hpp new file mode 100644 index 0000000000..d3f5bfb40f --- /dev/null +++ b/src/armnn/backends/WorkloadFactory.hpp @@ -0,0 +1,105 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Workload.hpp" +#include <memory> +#include "armnn/TensorFwd.hpp" +#include "OutputHandler.hpp" + +namespace armnn +{ + +class Layer; + +// Workload factory interface for compute backends +class IWorkloadFactory +{ +public: + virtual ~IWorkloadFactory() { } + + virtual Compute GetCompute() const = 0; + + static bool IsLayerSupported(Compute compute, const Layer& layer, DataType dataType, + std::string& outReasonIfUnsupported); + static bool IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const = 0; + + virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin + ) const = 0; + + virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const = 0; + + virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; +}; + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/backends/WorkloadInfo.hpp b/src/armnn/backends/WorkloadInfo.hpp new file mode 100644 index 0000000000..b0a0d2fe0f --- /dev/null +++ b/src/armnn/backends/WorkloadInfo.hpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +namespace armnn +{ + +/// Contains information about inputs and outputs to a layer. +/// This is needed at construction of workloads, but are not stored. +struct WorkloadInfo +{ + std::vector<TensorInfo> m_InputTensorInfos; + std::vector<TensorInfo> m_OutputTensorInfos; +}; + +} //namespace armnn diff --git a/src/armnn/backends/test/ActivationFixture.hpp b/src/armnn/backends/test/ActivationFixture.hpp new file mode 100644 index 0000000000..a67a110354 --- /dev/null +++ b/src/armnn/backends/test/ActivationFixture.hpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "TensorCopyUtils.hpp" +#include "WorkloadTestUtils.hpp" + +struct ActivationFixture +{ + ActivationFixture() + { + auto boostArrayExtents = boost::extents + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(batchSize)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(channels)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(height)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(width)]; + output.resize(boostArrayExtents); + outputExpected.resize(boostArrayExtents); + input.resize(boostArrayExtents); + + unsigned int inputShape[] = { batchSize, channels, height, width }; + unsigned int outputShape[] = { batchSize, channels, height, width }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + input = MakeRandomTensor<float, 4>(inputTensorInfo, 21453); + } + + unsigned int width = 17; + unsigned int height = 29; + unsigned int channels = 2; + unsigned int batchSize = 5; + + boost::multi_array<float, 4> output; + boost::multi_array<float, 4> outputExpected; + boost::multi_array<float, 4> input; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + // parameters used by some of the activation functions + float a = 0.234f; + float b = -12.345f; +}; + + +struct PositiveActivationFixture : public ActivationFixture +{ + PositiveActivationFixture() + { + input = MakeRandomTensor<float, 4>(inputTensorInfo, 2342423, 0.0f, 1.0f); + } +};
\ No newline at end of file diff --git a/src/armnn/backends/test/ActivationTestImpl.hpp b/src/armnn/backends/test/ActivationTestImpl.hpp new file mode 100644 index 0000000000..255a00ef0b --- /dev/null +++ b/src/armnn/backends/test/ActivationTestImpl.hpp @@ -0,0 +1,559 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" +#include "ActivationFixture.hpp" + +#include <algorithm> + +template<typename T> +LayerTestResult<T, 4> BoundedReLuTestCommon(armnn::IWorkloadFactory& workloadFactory, + float upperBound, float lowerBound, + float inputScale, int32_t inputOffset, float outputScale, int32_t outputOffset, + const std::vector<T>& inputData, const std::vector<T>& outputExpectedData, + unsigned int inputWidth, unsigned int inputHeight, + unsigned int inputChannels, unsigned int inputBatchSize) +{ + unsigned int outputWidth = inputWidth; + unsigned int outputHeight = inputHeight; + unsigned int outputChannels = inputChannels; + unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType<T>()); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<T>()); + + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(inputScale); + inputTensorInfo.SetQuantizationOffset(inputOffset); + + outputTensorInfo.SetQuantizationScale(outputScale); + outputTensorInfo.SetQuantizationOffset(outputOffset); + } + + LayerTestResult<T, 4> result(inputTensorInfo); + + auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + descriptor.m_Parameters.m_Function = armnn::ActivationFunction::BoundedReLu; + descriptor.m_Parameters.m_A = upperBound; + descriptor.m_Parameters.m_B = lowerBound; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData); + + return result; +} + +LayerTestResult<float, 4> BoundedReLuUpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 4u; + unsigned int inputHeight = 5u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector<float> input = std::vector<float>{ + -2.0f, 0.1f, 0.5f, 1.25f, + 0.786f, 0.9875f, -1.5f, 0.384f, + 1.0001f, 3.5f, 7.5f, 0.896f, + 2.126f, 2.0f, 0.3f, 0.15f, + 0.999f, 1.2f, 0.89f, 6.1f, + }; + + // Calculated manually + std::vector<float> output = std::vector<float>{ + -1.0f, 0.1f, 0.5f, 1.0f, + 0.786f, 0.9875f, -1.0f, 0.384f, + 1.0f, 1.0f, 1.0f, 0.896f, + 1.0f, 1.0f, 0.3f, 0.15f, + 0.999f, 1.0f, 0.89f, 1.0f, + }; + + return BoundedReLuTestCommon(workloadFactory, 1.0f, -1.0f, 1.0f, 0, 1.0f, 0, input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +LayerTestResult<float, 4> BoundedReLuUpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 4u; + unsigned int inputHeight = 5u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector<float> input = std::vector<float>{ + -1.0f, 0.1f, 0.5f, 6.25f, + 0.786f, 5.9875f, -0.5f, 0.384f, + 6.0001f, 3.5f, 7.5f, 0.896f, + 2.126f, 12.0f, 0.3f, 0.15f, + 0.999f, 1.2f, 0.89f, 6.1f, + }; + + // Calculated manually + std::vector<float> output = std::vector<float>{ + 0.0f, 0.1f, 0.5f, 6.0f, + 0.786f, 5.9875f, 0.0f, 0.384f, + 6.0f, 3.5f, 6.0f, 0.896f, + 2.126f, 6.0f, 0.3f, 0.15f, + 0.999f, 1.2f, 0.89f, 6.0f, + }; + + return BoundedReLuTestCommon(workloadFactory, 6.0f, 0.0f, 1.0f, 0, 1.0f, 0, input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 3u; + unsigned int inputHeight = 2u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector<uint8_t> input = std::vector<uint8_t>{ + 51, 124, 28, + 251, 8, 92 + }; + + // Calculated manually + std::vector<uint8_t> output = std::vector<uint8_t>{ + 0, 122, 0, + 255, 0, 58 + }; + + float inputScale = 12.0f / 255.0f; + int32_t inputOffset = 63; + float outputScale = 6.0f / 255.0f; + int32_t outputOffset = 0; + + return BoundedReLuTestCommon(workloadFactory, 6.0f, 0.0f, + inputScale, inputOffset, outputScale, outputOffset, + input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 3u; + unsigned int inputHeight = 2u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector<uint8_t> input = std::vector<uint8_t>{ + 51, 230, 28, + 251, 8, 92 + }; + + // Calculated manually + std::vector<uint8_t> output = std::vector<uint8_t>{ + 51, 192, 32, + 192, 32, 92 + }; + + int32_t inputOffset = 112; + float inputScale = 0.0125f; + + return BoundedReLuTestCommon(workloadFactory, 1.0f, -1.0f, + inputScale, inputOffset, inputScale, inputOffset, // input/output scale & offset same + input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +namespace +{ + +struct BoundedReLuRandomInputTestTraits +{ + constexpr static unsigned int inputHeight = 31u; + constexpr static unsigned int inputWidth = 19u; + constexpr static unsigned int inputChannels = 4u; + constexpr static unsigned int inputBatchSize = 2; + + constexpr static unsigned int outputHeight = inputHeight; + constexpr static unsigned int outputWidth = inputWidth; + constexpr static unsigned int outputChannels = inputChannels; + constexpr static unsigned int outputBatchSize = inputBatchSize; + + static armnn::TensorInfo GetInputTensorInfo() + { + return armnn::TensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + } + + static armnn::TensorInfo GetOutputTensorInfo() + { + return armnn::TensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + } +}; + +boost::multi_array<float, 4> BoundedReLuRandomInputTest(armnn::IWorkloadFactory& workloadFactory, + float lowerBound, + float upperBound, + const armnn::ActivationDescriptor& activationDescriptor) +{ + const armnn::TensorInfo inputTensorInfo = BoundedReLuRandomInputTestTraits::GetInputTensorInfo(); + const armnn::TensorInfo outputTensorInfo = BoundedReLuRandomInputTestTraits::GetOutputTensorInfo(); + + boost::multi_array<float, 4> output(GetTensorShapeAsArray<4>(outputTensorInfo)); + + // min/max random values passed to MakeRandomTensor are purposely outside of the ReLu range [lowerBound, upperBound] + auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 4605828, lowerBound - 5.0f, upperBound * 2.0f); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + descriptor.m_Parameters = activationDescriptor; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&output[0][0][0][0], outputHandle.get()); + + return output; +} + +} // namespace + +LayerTestResult<float, 4> CompareBoundedReLuTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& otherWorkloadFactory, + float upperBound, + float lowerBound) +{ + LayerTestResult<float, 4> result(BoundedReLuRandomInputTestTraits::GetOutputTensorInfo()); + + armnn::ActivationDescriptor activationDescriptor; + activationDescriptor.m_Function = armnn::ActivationFunction::BoundedReLu; + activationDescriptor.m_A = upperBound; + activationDescriptor.m_B = lowerBound; + + result.output = BoundedReLuRandomInputTest(workloadFactory, 0.0f, upperBound, activationDescriptor); + result.outputExpected = BoundedReLuRandomInputTest(otherWorkloadFactory, 0.0f, upperBound, activationDescriptor); + + return result; +} + +template<typename T> +LayerTestResult<T,4> ConstantLinearActivationTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int inputHeight = 20; + unsigned int inputWidth = 17; + unsigned int inputChannels = 3; + unsigned int batchSize = 5; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, inputChannels, inputHeight, inputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Do linear activation that should leave tensor unchanged + armnn::ActivationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_A = 1.0f; + data.m_Parameters.m_B = 0.0f; + data.m_Parameters.m_Function = armnn::ActivationFunction::Linear; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + boost::multi_array<T, 4> input = MakeRandomTensor<T, 4>(inputTensorInfo, 7123561); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + // Ensure output equals input + ret.outputExpected = input; + + return ret; +} + +LayerTestResult<float, 4> ConstantLinearActivationTest(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantLinearActivationTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> ConstantLinearActivationUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantLinearActivationTestCommon<uint8_t>(workloadFactory, 4.0f, 3); +} + +template<typename T> +LayerTestResult<T, 4> SimpleActivationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::ActivationFunction activationFunction, + float activationParameterA, + float activationParameterB, + float qScale, + int32_t qOffset, + const std::vector<float>& inputData, + const std::vector<float>& outputExpectedData) +{ + constexpr static unsigned int inputWidth = 16u; + constexpr static unsigned int inputHeight = 1u; + constexpr static unsigned int inputChannels = 1u; + constexpr static unsigned int inputBatchSize = 1u; + + constexpr static unsigned int outputWidth = inputWidth; + constexpr static unsigned int outputHeight = inputHeight; + constexpr static unsigned int outputChannels = inputChannels; + constexpr static unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult<T, 4> result(inputTensorInfo); + + auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData)); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + descriptor.m_Parameters.m_Function = activationFunction; + descriptor.m_Parameters.m_A = activationParameterA; + descriptor.m_Parameters.m_B = activationParameterB; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + // Calculated manually + result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, outputExpectedData)); + + return result; +} + +template<typename T> +LayerTestResult<T, 4> SimpleSigmoidTestCommon(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) +{ + std::vector<float> inputData = { + -0.1f, -0.2f, -0.3f, -0.4f, + 0.1f, 0.2f, 0.3f, 0.4f, + -1.0f, -2.0f, -3.0f, -4.0f, + 1.0f, 2.0f, 3.0f, 4.0f + }; + + // Calculate output values for input + auto f = [](float value) + { + return 1.0f / (1.0f + std::exp(-value)); + }; + std::vector<float> outputExpectedData(inputData.size()); + std::transform(inputData.begin(), inputData.end(), outputExpectedData.begin(), f); + + return SimpleActivationTest<T>(workloadFactory, + armnn::ActivationFunction::Sigmoid, + 0.f, + 0.f, + qScale, + qOffset, + inputData, + outputExpectedData); +} + +LayerTestResult<float, 4> SimpleSigmoidTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleSigmoidTestCommon<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<uint8_t, 4> SimpleSigmoidUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleSigmoidTestCommon<uint8_t>(workloadFactory, 0.1f, 50); +} + +template<typename T> +LayerTestResult<T,4> CompareActivationTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f, + unsigned int batchSize = 5, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int width = 17; + unsigned int height = 29; + unsigned int channels = 2; + + float a = 0.234f; + float b = -12.345f; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, channels, height, width}; + + inputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + float minVal = -10.f; + if (f == armnn::ActivationFunction::Sqrt) + { + minVal = 0.f; + } + + boost::multi_array<T, 4> input = MakeRandomTensor<T, 4>(inputTensorInfo, 21453, minVal, 10.f); + + + LayerTestResult<T,4> ret(outputTensorInfo); + auto boostArrayExtents = boost::extents + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(batchSize)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(channels)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(height)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(width)]; + ret.output.resize(boostArrayExtents); + ret.outputExpected.resize(boostArrayExtents); + + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ActivationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_A = a; + data.m_Parameters.m_B = b; + data.m_Parameters.m_Function = f; + + armnn::ActivationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(data, info); + BOOST_ASSERT(workload != nullptr); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateActivation(refData, refInfo); + BOOST_ASSERT(workloadRef != nullptr); + + inputHandle->Allocate(); + outputHandle->Allocate(); + inputHandleRef->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +LayerTestResult<float,4> CompareActivationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f, + unsigned int batchSize) +{ + return CompareActivationTestImpl<float>(workloadFactory, refWorkloadFactory, f, batchSize); +} + +LayerTestResult<uint8_t,4> CompareActivationUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f) +{ + return CompareActivationTestImpl<uint8_t>(workloadFactory, refWorkloadFactory, f, 5, 0.1f, 50); +} diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp new file mode 100644 index 0000000000..5933cebc80 --- /dev/null +++ b/src/armnn/backends/test/ArmComputeCl.cpp @@ -0,0 +1,269 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "test/TensorHelpers.hpp" +#include "LayerTests.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClWorkloadFactory.hpp" +#include "backends/ClWorkloadUtils.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/ClLayerSupport.hpp" +#include "ActivationFixture.hpp" + +#include <arm_compute/core/CL/CLKernelLibrary.h> +#include <arm_compute/runtime/CL/CLScheduler.h> +#include <string> +#include <iostream> + +#include "test/UnitTests.hpp" + +BOOST_AUTO_TEST_SUITE(Compute_ArmComputeCl) +using FactoryType = armnn::ClWorkloadFactory; + +// ============================================================================ +// UNIT tests + +// Activation +ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) + +ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) + +// Fully Connected +ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) + +ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) + +// Convolution +ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquare, SimpleConvolution2d3x3Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8, SimpleConvolution2d3x3Uint8Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) + +// Depthwise Convolution +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) + +// Splitter +BOOST_AUTO_TEST_CASE(SimpleSplitter) +{ + armnn::ClWorkloadFactory workloadFactory; + auto testResult = SplitterTest(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +BOOST_AUTO_TEST_CASE(SimpleSplitterUint8) +{ + armnn::ClWorkloadFactory workloadFactory; + auto testResult = SplitterUint8Test(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) +ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) + +// Merger +ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) +ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) + +// Pooling +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, true) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, + IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride1, L2Pooling2dSize3Stride1Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride1Uint8, L2Pooling2dSize3Stride1Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride3, L2Pooling2dSize3Stride3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride3Uint8, L2Pooling2dSize3Stride3Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride4, L2Pooling2dSize3Stride4Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride4Uint8, L2Pooling2dSize3Stride4Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize9, L2Pooling2dSize9Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Test) + +// Add +ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) + +// Mul +ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) + +// Batch Norm +ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) + +ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest) + +// Resize Bilinear +ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear, SimpleResizeBilinearTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearNop, ResizeBilinearNopTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMin, ResizeBilinearSqMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMin, ResizeBilinearMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMag, ResizeBilinearMagTest) + +// Constant +ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) +ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantTestUint8) + +// Concat +ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) +ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) + +// Floor +ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) + +// Reshape +ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) +ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) + +// Permute +ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) +ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) + +// ============================================================================ +// COMPARE tests + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareConv2dWithReference, CompareConvolution2dTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceFloat32, CompareDepthwiseConvolution2dTest<float>) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceUint8, CompareDepthwiseConvolution2dTest<uint8_t>) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationWithinWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Within, + armnn::NormalizationAlgorithmMethod::LocalBrightness) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationAcrossWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Across, + armnn::NormalizationAlgorithmMethod::LocalBrightness) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta1WithReference, CompareSoftmaxTest, 1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta2WithReference, CompareSoftmaxTest, 2.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8, CompareSoftmaxUint8Test, 1.0f) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::Max) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::Average) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithRefUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::Average) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareL2Pooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::L2) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAddition, CompareAdditionTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMultiplicationWithRef, CompareMultiplicationTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareBatchNorm, CompareBatchNormTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareReLu1, CompareBoundedReLuTest, 1.0f, -1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareReLu6, CompareBoundedReLuTest, 6.0f, 0.0f) + +// ============================================================================ +// FIXTURE tests + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSigmoidActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sigmoid, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareTanhActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::TanH, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLinearActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Linear, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::ReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::BoundedReLu, 5u) +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReferenceUint8, ActivationFixture, + CompareActivationUint8Test, armnn::ActivationFunction::BoundedReLu) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSoftReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::SoftReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLeakyReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::LeakyReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareAbsActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Abs, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSqrtActivationWithReference, PositiveActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sqrt, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSquareActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Square, 5u) + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/ArmComputeNeon.cpp b/src/armnn/backends/test/ArmComputeNeon.cpp new file mode 100644 index 0000000000..dd8a668940 --- /dev/null +++ b/src/armnn/backends/test/ArmComputeNeon.cpp @@ -0,0 +1,360 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "test/TensorHelpers.hpp" +#include "LayerTests.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/NeonWorkloadFactory.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/test/TensorCopyUtils.hpp" +#include "ActivationFixture.hpp" + +#include "WorkloadTestUtils.hpp" + +#include "test/UnitTests.hpp" + +BOOST_AUTO_TEST_SUITE(Compute_ArmComputeNeon) +using FactoryType = armnn::NeonWorkloadFactory; + +// ============================================================================ +// UNIT tests + +// Convolution +ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquare, SimpleConvolution2d3x3Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) + +namespace +{ + +armnn::Convolution2dDescriptor MakeConv2dDesc(uint32_t strideX, uint32_t strideY, + uint32_t padLeft = 0, uint32_t padRight = 0, uint32_t padTop = 0, uint32_t padBottom = 0) +{ + armnn::Convolution2dDescriptor result; + result.m_StrideX = strideX; + result.m_StrideY = strideY; + result.m_PadLeft = padLeft; + result.m_PadRight = padRight; + result.m_PadTop = padTop; + result.m_PadBottom = padBottom; + result.m_BiasEnabled = true; + return result; +} + +} + +BOOST_AUTO_TEST_CASE(Conv2dUtils) +{ + // the only preferred Neon convolution is 1x1 with padding=0 and stride size {1,2,3} + armnn::TensorShape shape1x1({ 1,1,1,1 }); + armnn::TensorInfo info1x1(shape1x1, armnn::DataType::Float32); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 2))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 3))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 1))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 2))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 3))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 1))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 2))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 3))); + + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(4, 1))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(4, 5))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 6))); + + // non zero padding is not preferred for direct convolution + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 1, 0))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 0, 1))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 1, 1))); + + // 2x2 filter not preferred for direct convolution + armnn::TensorShape shape2x2({ 1,1,2,2 }); + armnn::TensorInfo info2x2(shape2x2, armnn::DataType::Float32); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info2x2, MakeConv2dDesc(1, 1))); +} + +// Depthwise Convolution +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) + +namespace +{ + +armnn::DepthwiseConvolution2dDescriptor MakeDepthwiseConv2dDesc(uint32_t strideX, uint32_t strideY, + uint32_t depthMultiplier = 1, uint32_t padLeft = 0, uint32_t padRight = 0, + uint32_t padTop = 0, uint32_t padBottom = 0) +{ + armnn::DepthwiseConvolution2dDescriptor desc; + desc.m_PadLeft = padLeft; + desc.m_PadRight = padRight; + desc.m_PadTop = padTop; + desc.m_PadBottom = padBottom; + desc.m_StrideX = strideX; + desc.m_StrideY = strideY; + desc.m_BiasEnabled = true; + return desc; +} + +} + +BOOST_AUTO_TEST_CASE(DepthwiseConv2dUtils) +{ + armnn::TensorInfo inputInfo({ 1, 1, 10, 10 }, armnn::DataType::Float32); + armnn::TensorInfo weightsInfo3x3({ 1, 1, 3, 3 }, armnn::DataType::Float32); + + // Strides supported: 1,2,3 + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 2), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 3), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(2, 1), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(2, 2), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(2, 3), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(3, 1), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(3, 2), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(3, 3), weightsInfo3x3)); + + // Unsupported stride + BOOST_TEST(!armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(4, 1), weightsInfo3x3)); + + // Supported weights shape 1x1 + armnn::TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, armnn::DataType::Float32); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo1x1)); + + // Supported shape 2x2 + armnn::TensorInfo weightsInfo2x2({ 1, 1, 2, 2 }, armnn::DataType::Float32); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo2x2)); +} + +// Pooling +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, true) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride1, L2Pooling2dSize3Stride1Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride1Uint8, L2Pooling2dSize3Stride1Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride3, L2Pooling2dSize3Stride3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride3Uint8, L2Pooling2dSize3Stride3Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride4, L2Pooling2dSize3Stride4Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride4Uint8, L2Pooling2dSize3Stride4Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize9, L2Pooling2dSize9Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Test) + +// Ignore padding values for pooling but count padding fields into the divisor +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, + IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) + +// Activation +ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) + +ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) + +// Splitter +BOOST_AUTO_TEST_CASE(SimpleSplitter) +{ + armnn::NeonWorkloadFactory workloadFactory; + auto testResult = SplitterTest(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +BOOST_AUTO_TEST_CASE(SimpleSplitterUint8) +{ + armnn::NeonWorkloadFactory workloadFactory; + auto testResult = SplitterUint8Test(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) +ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) + +// Merger +ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) +ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) + +// Fully Connected +ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) +ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) + +// Add +ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) + +// Mul +ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) + +// Batch Norm +ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) + +// Constant +ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) +ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantTestUint8) + +// Concatenation +ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) +ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) + +// L2 Normalization +ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest); +ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest); +ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest); +ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest); + +// Floor +ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) + +// Reshape +ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) +ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) + +// Permute +ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) +ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) +// ============================================================================ +// COMPARE tests + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareConv2dWithReference, CompareConvolution2dTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceFloat32, CompareDepthwiseConvolution2dTest<float>) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceUint8, CompareDepthwiseConvolution2dTest<uint8_t>) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationWithinWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Within, + armnn::NormalizationAlgorithmMethod::LocalBrightness) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationAcrossWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Across, + armnn::NormalizationAlgorithmMethod::LocalBrightness) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithReference, ComparePooling2dTest, armnn::PoolingAlgorithm::Max) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithReferenceUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::Max) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithReference, ComparePooling2dTest, + armnn::PoolingAlgorithm::Average) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithReferenceUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::Average) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareL2Pooling2dWithReference, ComparePooling2dTest, armnn::PoolingAlgorithm::L2) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(UNSUPPORTED_CompareL2Pooling2dWithReferenceUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::L2) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta1WithReference, CompareSoftmaxTest, 1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta2WithReference, CompareSoftmaxTest, 2.0f) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8Beta1WithReference, CompareSoftmaxUint8Test, 1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8Beta2WithReference, CompareSoftmaxUint8Test, 2.0f) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAddition, CompareAdditionTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMultiplicationWithReference, CompareMultiplicationTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareBatchNorm, CompareBatchNormTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(ReLu1, CompareBoundedReLuTest, 1.0f, -1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(ReLu6, CompareBoundedReLuTest, 6.0f, 0.0f) + +// ============================================================================ +// FIXTURE tests + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSigmoidActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sigmoid, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareTanhActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::TanH, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLinearActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Linear, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::ReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::BoundedReLu, 5u) +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReferenceUint8, ActivationFixture, + CompareActivationUint8Test, armnn::ActivationFunction::BoundedReLu) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSoftReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::SoftReLu, 1u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLeakyReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::LeakyReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareAbsActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Abs, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSqrtActivationWithReference, PositiveActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sqrt, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSquareActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Square, 5u) + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/BatchNormTestImpl.hpp b/src/armnn/backends/test/BatchNormTestImpl.hpp new file mode 100644 index 0000000000..861ef6b053 --- /dev/null +++ b/src/armnn/backends/test/BatchNormTestImpl.hpp @@ -0,0 +1,112 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include "backends/test/QuantizeHelper.hpp" + + +template<typename T> +LayerTestResult<T,4> BatchNormTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + const unsigned int width = 2; + const unsigned int height = 3; + const unsigned int channels = 2; + const unsigned int num = 1; + + armnn::TensorInfo inputTensorInfo({num, channels, height, width}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({num, channels, height, width}, armnn::GetDataType<T>()); + armnn::TensorInfo tensorInfo({channels}, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + tensorInfo.SetQuantizationScale(qScale); + tensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, + { + 1.f, 4.f, + 4.f, 2.f, + 1.f, 6.f, + + 1.f, 1.f, + 4.f, 1.f, + -2.f, 4.f + })); + // these values are per-channel of the input + auto mean = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, -2})); + auto variance = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {4, 9})); + auto beta = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, 2})); + auto gamma = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {2, 1})); + LayerTestResult<T,4> ret(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::BatchNormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle meanTensor(tensorInfo); + armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo); + armnn::ScopedCpuTensorHandle betaTensor(tensorInfo); + armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo); + + AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); + AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); + AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); + AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Mean = &meanTensor; + data.m_Variance = &varianceTensor; + data.m_Beta = &betaTensor; + data.m_Gamma = &gammaTensor; + data.m_Parameters.m_Eps = 0.0f; + + // for each channel: + // substract mean, divide by standard deviation (with an epsilon to avoid div by 0) + // multiply by gamma and add beta + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, + { + 1.f, 4.f, + 4.f, 2.f, + 1.f, 6.f, + + 3.f, 3.f, + 4.f, 3.f, + 2.f, 4.f + })); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +}
\ No newline at end of file diff --git a/src/armnn/backends/test/Conv2dTestImpl.hpp b/src/armnn/backends/test/Conv2dTestImpl.hpp new file mode 100644 index 0000000000..0c0511b234 --- /dev/null +++ b/src/armnn/backends/test/Conv2dTestImpl.hpp @@ -0,0 +1,802 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +// Mapping from input type to bias type for fully connected layers. +// float => float, uint8_t => int32_t +template<typename T> +struct FullyConnectedBiasTypeForInputType; + +template<> +struct FullyConnectedBiasTypeForInputType<float> +{ + using Type = float; +}; + +template<> +struct FullyConnectedBiasTypeForInputType<uint8_t> +{ + using Type = int32_t; +}; + +// Modifies a std::vector in-place using a specified bias +template<typename T, typename B> +void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset, + const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h) +{ + BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()), + "Invalid type and parameter combination."); + BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()), + "Invalid type and parameter combination."); + + // Note we need to dequantize and re-quantize the image value and the bias + for (uint32_t i = 0; i < bias.size(); ++i) + { + float dBias = SelectiveDequantize(bias[i], bScale, bOffset); + for (uint32_t y = 0; y < h; ++y) + { + for (uint32_t x = 0; x < w; ++x) + { + uint32_t offset = (i * h + y) * w + x; + BOOST_ASSERT(offset < v.size()); + T& outRef = v[offset]; + float dOutput = SelectiveDequantize(outRef, vScale, vOffset); + outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset); + } + } + } +} + + + +template<typename T, typename B> +LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array<T, 4>& input, + const boost::multi_array<T, 4>& kernel, + const boost::multi_array<B, 1>& bias, + const boost::multi_array<T, 4>& outputExpected, + float qScale, + int32_t qOffset, + uint32_t padLeft = 0, + uint32_t padTop = 0, + uint32_t padRight = 0, + uint32_t padBottom = 0) +{ + unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]); + unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]); + unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]); + unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]); + + unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); + unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); + unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); + + unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]); + unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]); + + bool biasEnabled = bias.size() > 0; + + // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches) + BOOST_ASSERT(inputNum == 1); + BOOST_ASSERT(outputNum == 1); + + // If a bias is used, its size must equal the number of output channels + BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); + + + // Note these tensors will use two (identical) batches + armnn::TensorInfo inputTensorInfo({2*inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({2*outputNum, outputChannels, outputHeight, outputWidth}, + armnn::GetDataType<T>()); + armnn::TensorInfo kernelDesc({outputChannels, inputChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); + armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + + // Construct input data - Two batches of the same input image + std::vector<T> inputImage; + inputImage.assign(input.data(), input.data() + 1*inputChannels*inputHeight*inputWidth); + std::vector<T> inputData; + inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); + inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); + auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData); + + std::vector<T> outputImage; + outputImage.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); + + // Apply bias to output image if enabled + if(biasEnabled) + { + std::vector<T> biasV; + biasV.assign(bias.data(), bias.data() + outputChannels); + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + // Construct expected output data - two identical images + std::vector<T> outputData; + outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); + outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); + + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData); + + // todo: nontrivial padding and strides + uint32_t strideX = 1; + uint32_t strideY = 1; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + + if(biasEnabled) + { + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + } + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // still set this whether or not bias is enabled - can be a source of bugs + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padLeft; + data.m_Parameters.m_PadRight = padRight; + data.m_Parameters.m_PadTop = padTop; + data.m_Parameters.m_PadBottom = padBottom; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<typename T, typename B> +LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + unsigned int inputHeight = 3; + unsigned int inputWidth = 3; + unsigned int inputChannels = 2; + unsigned int inputNum = 1; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + unsigned int kernelChannels = inputChannels; + + unsigned int outputHeight = 1; + unsigned int outputWidth = 1; + unsigned int outputChannels = kernelChannels; + unsigned int outputNum = inputNum; + + armnn::TensorInfo inputTensorInfo({ inputNum, inputChannels, inputHeight, inputWidth }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ outputNum, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<T>()); + armnn::TensorInfo kernelDesc({ 1, outputChannels, kernelHeight, kernelWidth }, armnn::GetDataType<T>()); + armnn::TensorInfo biasDesc({ outputChannels }, armnn::GetDataType<B>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 1.f, 2.f, 1.f, + 2.f, 1.f, 2.f, + 1.f, 2.f, 1.f, + + 1.f, 2.f, 1.f, + 2.f, 1.f, 2.f, + 1.f, 2.f, 1.f, + }))); + + std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + {0, 2})); + auto bias = MakeTensor<B, 1>(biasDesc, biasV); + + auto kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { + 1.f, 0.f, 1.f, + 0.f, 0.f, 0.f, + -1.f, 0.f, -1.f, + + 1.f, 0.f, 1.f, + 0.f, 0.f, 0.f, + -1.f, 0.f, -1.f, + }))); + + // manually calculated + std::vector<T> outputImage( + QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), + outputTensorInfo.GetQuantizationOffset(), + {0.f, 0.f}) + ); + + // Optionally apply bias to output image + if(biasEnabled) + { + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // still set this whether or not bias is enabled + data.m_Parameters.m_StrideX = 1; + data.m_Parameters.m_StrideY = 1; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = 0; + data.m_Parameters.m_PadBottom = 0; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<typename T, typename B> +LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + unsigned int depthMultiplier = 2; + + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 2; + unsigned int inputBatchSize = 1; + + unsigned int kernelHeight = 5; + unsigned int kernelWidth = 3; + + unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2; + unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2; + unsigned int outputChannels = inputChannels * depthMultiplier; + unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({inputBatchSize, inputChannels, inputHeight, inputWidth}, + armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({outputBatchSize, outputChannels, outputHeight, outputWidth}, + armnn::GetDataType<T>()); + armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); + armnn::TensorInfo biasDesc({outputChannels}, armnn::GetDataType<B>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + {0, 2, 1, -1})); + auto bias = MakeTensor<B, 1>(biasDesc, biasV); + + auto kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + 0, 0, 0, + 0, -1, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 0, 0, 0, + 0, 0, 0, + 0, 1, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // manually calculated + std::vector<T> outputImage = std::vector<T>( + QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { + 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, + 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, + 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, + 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, + 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, + 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, + + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + + 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f + })); + + // Optionally apply bias to output image + if(biasEnabled) + { + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // still set this whether or not bias is enabled + data.m_Parameters.m_StrideX = 2; + data.m_Parameters.m_StrideY = 1; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = 1; + data.m_Parameters.m_PadBottom = 1; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + + + +template<typename T> +LayerTestResult<T,4> Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + using B = typename FullyConnectedBiasTypeForInputType<T>::Type; + + // until we have a specialist 1D convolution layer, we can fake one using + // 2D convolution with the final dimension set to 1. + // I don't anticipate this being particularly slow, given that convolution is implemented + // as a matrix multiplication, at which point dimension doesn't matter. + + unsigned int batchSize = 1; + unsigned int inputChannels = 2; + unsigned int outputChannels = 3; + unsigned int inputSize = 5; // the 1D size (could view as 'width' or 'height') + unsigned int kernelSize = 3; + unsigned int padSize = 2; + unsigned int stride = 1; + unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride + + armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, armnn::GetDataType<T>()); + armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, armnn::GetDataType<T>()); + armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, armnn::GetDataType<T>()); + armnn::TensorInfo biasInfo({outputChannels}, armnn::GetDataType<B>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputInfo.SetQuantizationScale(qScale); + inputInfo.SetQuantizationOffset(qOffset); + outputInfo.SetQuantizationScale(qScale); + outputInfo.SetQuantizationOffset(qOffset); + kernelInfo.SetQuantizationScale(qScale); + kernelInfo.SetQuantizationOffset(qOffset); + biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale()); + biasInfo.SetQuantizationOffset(0); + } + + std::vector<T> inputData( + QuantizedVector<T>(inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), { + 5.0f, -2.0f, 2.5f, 0.0f, 1.0f, + -3.0f, 3.2f, 5.0f, 2.0f, 3.0f, + })); + + std::vector<T> kernelData( + QuantizedVector<T>(kernelInfo.GetQuantizationScale(), kernelInfo.GetQuantizationOffset(), { + 1.0f, 0.0f, 0.0f, + 0.0f, 2.0f, -1.5f, + + 0.0f, 0.0f, 0.0f, + 0.2f, 0.2f, 0.2f, + + 0.5f, 0.0f, 0.5f, + 0.0f, -1.0f, 0.0f + })); + + std::vector<B> biasData( + QuantizedVector<B>(biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), { + 1.0f, 0.0f, 0.0f + })); + + std::vector<T> outputData( + QuantizedVector<T>(outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), { + 4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f, + -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f, + 2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f + })); + + // Optionally apply bias to output image + if(biasEnabled) + { + ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), + biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), + 1, outputSize); + } + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo); + armnn::ScopedCpuTensorHandle biasTensor(biasInfo); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); + AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); + + AddInputToWorkload(data, info, inputInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = 1; + data.m_Parameters.m_StrideY = stride; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = padSize; + data.m_Parameters.m_PadBottom = padSize; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + + workload->Execute(); + + // output + LayerTestResult<T,4> ret(outputInfo); + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData); + return ret; +} + + + +template<typename T> +LayerTestResult<T,4> CompareConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 3; + unsigned int inputNum = 5; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + + unsigned int strideX = 2; + unsigned int strideY = 3; + unsigned int padX = 1; + unsigned int padY = 1; + + unsigned int outputNum = inputNum; + unsigned int outputChannels = 2; + unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; + unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo kernelDesc; + armnn::TensorInfo biasDesc; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth}; + unsigned int biasShape[] = {outputChannels}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>()); + kernelDesc = armnn::TensorInfo(4, kernelShape, armnn::GetDataType<T>()); + biasDesc = armnn::TensorInfo(1, biasShape, armnn::GetDataType<T>()); + + LayerTestResult<T,4> ret(outputTensorInfo); + + auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908); + auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234); + auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_BiasEnabled = true; + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::Convolution2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +template<typename T> +LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 3; + unsigned int inputNum = 5; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + unsigned int channelMultiplier = 1; + + unsigned int strideX = 2; + unsigned int strideY = 3; + unsigned int padX = 1; + unsigned int padY = 1; + + unsigned int outputNum = inputNum; + unsigned int outputChannels = inputChannels * channelMultiplier; + unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; + unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo kernelDesc; + armnn::TensorInfo biasDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + unsigned int kernelShape[] = { channelMultiplier, inputChannels, kernelHeight, kernelWidth }; + unsigned int biasShape[] = { outputChannels }; + + float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0; + float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0; + int32_t qOffset = 0; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>(), inputsQScale, qOffset); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>(), outputQScale, qOffset); + kernelDesc = armnn::TensorInfo(4, kernelShape, armnn::GetDataType<T>(), inputsQScale, qOffset); + biasDesc = armnn::TensorInfo(1, biasShape, armnn::GetBiasDataType(armnn::GetDataType<T>()), inputsQScale, qOffset); + + LayerTestResult<T, 4> ret(outputTensorInfo); + + auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f); + auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f); + auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(biasDesc, 1028, 0.0f, 255.0f); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_BiasEnabled = true; + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} diff --git a/src/armnn/backends/test/CreateWorkloadCl.cpp b/src/armnn/backends/test/CreateWorkloadCl.cpp new file mode 100644 index 0000000000..3f320d80e9 --- /dev/null +++ b/src/armnn/backends/test/CreateWorkloadCl.cpp @@ -0,0 +1,356 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "backends/ClWorkloadFactory.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/MemCopyWorkload.hpp" +#include "backends/ClWorkloadUtils.hpp" +#include "backends/ClWorkloads.hpp" +#include "backends/ClTensorHandle.hpp" + +#include "test/CreateWorkloadClNeon.hpp" + +boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle, + std::initializer_list<unsigned int> expectedDimensions) +{ + return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions); +} + +BOOST_AUTO_TEST_SUITE(CreateWorkloadCl) + +BOOST_AUTO_TEST_CASE(CreateActivationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateActivationWorkloadTest<ClActivationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest) + ActivationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1})); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateAdditionWorkloadTest<ClAdditionFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateAdditionWorkloadTest) + AdditionQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest) + BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 1, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3, 1, 1})); +} + +template <typename Convolution2dWorkloadType> +static void Convolution2dWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateConvolution2dWorkloadTest<Convolution2dWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 8, 16})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 2, 10})); +} + +BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat32Workload) +{ + Convolution2dWorkloadTest<ClConvolution2dFloat32Workload>(); +} + + +template <typename Convolution2dWorkloadType> +static void DirectConvolution2dWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateDirectConvolution2dWorkloadTest<Convolution2dWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest) + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6})); +} + +BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat32Workload) +{ + DirectConvolution2dWorkloadTest<ClConvolution2dFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload) +{ + DirectConvolution2dWorkloadTest<ClConvolution2dUint8Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = + CreateFullyConnectedWorkloadTest<ClFullyConnectedFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest) + FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7})); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = + CreateMultiplicationWorkloadTest<ClMultiplicationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest) + MultiplicationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); +} + +BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateNormalizationWorkloadTest<ClNormalizationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 5, 5, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 5, 5, 1})); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreatePooling2dWorkloadTest<ClPooling2dFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest) + Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 5, 5})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 2, 4})); +} + +template <typename ReshapeWorkloadType> +static void ClCreateReshapeWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest) + ReshapeQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4})); // Leading size 1 dimensions are collapsed by ACL. +} + +BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) +{ + ClCreateReshapeWorkloadTest<ClReshapeFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) +{ + ClCreateReshapeWorkloadTest<ClReshapeUint8Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateSoftmaxWorkloadTest<ClSoftmaxFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of ClSoftmaxFloat32Workload) + SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1})); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateSplitterWorkloadTest<ClSplitterFloat32Workload>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {7})); + auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {4})); + auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {1})); + auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2})); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMerger) +{ + // Test that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer + // We test that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input + // of the merger. + + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workloads = + CreateSplitterMergerWorkloadTest<ClSplitterFloat32Workload, ClMergerFloat32Workload>(factory, graph); + + auto wlSplitter = std::move(workloads.first); + auto wlMerger = std::move(workloads.second); + + //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlMerger->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlMerger->GetData().m_Inputs[1]); + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(mIn0); + BOOST_TEST(mIn1); + + //fliped order of inputs/outputs + bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); + BOOST_TEST(validDataPointers); + + + //also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor + bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent()) + && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent()); + + BOOST_TEST(validSubTensorParents); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) +{ + // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We create a splitter with two outputs. That each of those outputs is used by two different activation layers + + Graph graph; + ClWorkloadFactory factory; + std::unique_ptr<ClSplitterFloat32Workload> wlSplitter; + std::unique_ptr<ClActivationFloat32Workload> wlActiv0_0; + std::unique_ptr<ClActivationFloat32Workload> wlActiv0_1; + std::unique_ptr<ClActivationFloat32Workload> wlActiv1_0; + std::unique_ptr<ClActivationFloat32Workload> wlActiv1_1; + + CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterFloat32Workload, + ClActivationFloat32Workload>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); + + //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]); + + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(activ0_0Im); + BOOST_TEST(activ0_1Im); + BOOST_TEST(activ1_0Im); + BOOST_TEST(activ1_1Im); + + bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && + (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl) +{ + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + CreateMemCopyWorkloads<CopyFromCpuToClWorkload,CopyFromClToCpuWorkload,IClTensorHandle>(factory); +} + +BOOST_AUTO_TEST_CASE(CreateL2NormalizationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateL2NormalizationWorkloadTest<ClL2NormalizationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + L2NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 5, 20, 50, 67 })); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 5, 20, 50, 67 })); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/CreateWorkloadNeon.cpp b/src/armnn/backends/test/CreateWorkloadNeon.cpp new file mode 100644 index 0000000000..807937ba2b --- /dev/null +++ b/src/armnn/backends/test/CreateWorkloadNeon.cpp @@ -0,0 +1,302 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "backends/NeonWorkloadFactory.hpp" +#include "backends/NeonWorkloadUtils.hpp" +#include "backends/NeonWorkloads.hpp" +#include "backends/MemCopyWorkload.hpp" +#include "backends/NeonTensorHandle.hpp" + +#include "test/CreateWorkloadClNeon.hpp" + +BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon) + +namespace +{ + +bool TestNeonTensorHandleInfo(armnn::INeonTensorHandle* handle, const armnn::TensorInfo& expectedInfo) +{ + using namespace armnn::armcomputetensorutils; + + const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info(); + const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo); + + if (handleInfo->data_type() != expectedAclInfo.data_type()) + { + return false; + } + + if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions()) + { + return false; + } + + if (handleInfo->quantization_info() != expectedAclInfo.quantization_info()) + { + return false; + } + + for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d) + { + if (handleInfo->dimension(d) != expectedAclInfo.dimension(d)) + { + return false; + } + } + + return true; +} + +} // namespace + +BOOST_AUTO_TEST_CASE(CreateActivationWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateActivationWorkloadTest<NeonActivationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest) + ActivationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateAdditionWorkloadTest<NeonAdditionFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateAdditionWorkloadTest) + AdditionQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto inputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateBatchNormalizationWorkloadTest<NeonBatchNormalizationFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest) + BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 1, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3, 1, 1}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateConvolution2dWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateConvolution2dWorkloadTest<NeonConvolution2dFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 8, 16}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 2, 2, 10}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateFullyConnectedWorkloadTest<NeonFullyConnectedFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest) + FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateMultiplicationWorkloadTest<NeonMultiplicationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest) + MultiplicationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto inputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateNormalizationWorkloadTest<NeonNormalizationFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 5, 5, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 5, 5, 1}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreatePooling2dWorkloadTest<NeonPooling2dFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest) + Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 2, 5, 5}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 2, 2, 4}, DataType::Float32))); +} + +template <typename ReshapeWorkloadType> +static void NeonCreateReshapeWorkloadTest(DataType dataType) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest) + ReshapeQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, dataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, dataType))); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) +{ + NeonCreateReshapeWorkloadTest<NeonReshapeFloat32Workload>(DataType::Float32); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) +{ + NeonCreateReshapeWorkloadTest<NeonReshapeUint8Workload>(DataType::QuantisedAsymm8); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateSoftmaxWorkloadTest<NeonSoftmaxFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest) + SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateSplitterWorkloadTest<NeonSplitterFloat32Workload>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 7}, DataType::Float32))); + auto outputHandle0 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 4}, DataType::Float32))); + auto outputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[1]); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({1, 1}, DataType::Float32))); + auto outputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[2]); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({1, 2}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMerger) +{ + // Test that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer + // We test that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input + // of the merger. + + Graph graph; + NeonWorkloadFactory factory; + + auto workloads = + CreateSplitterMergerWorkloadTest<NeonSplitterFloat32Workload, NeonMergerFloat32Workload>(factory, graph); + + auto wlSplitter = std::move(workloads.first); + auto wlMerger = std::move(workloads.second); + + //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::INeonTensorHandle* sOut0 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::INeonTensorHandle* sOut1 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::INeonTensorHandle* mIn0 = dynamic_cast<armnn::INeonTensorHandle*>(wlMerger->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* mIn1 = dynamic_cast<armnn::INeonTensorHandle*>(wlMerger->GetData().m_Inputs[1]); + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(mIn0); + BOOST_TEST(mIn1); + + bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) +{ + // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We create a splitter with two outputs. That each of those outputs is used by two different activation layers + + Graph graph; + NeonWorkloadFactory factory; + std::unique_ptr<NeonSplitterFloat32Workload> wlSplitter; + std::unique_ptr<NeonActivationFloat32Workload> wlActiv0_0; + std::unique_ptr<NeonActivationFloat32Workload> wlActiv0_1; + std::unique_ptr<NeonActivationFloat32Workload> wlActiv1_0; + std::unique_ptr<NeonActivationFloat32Workload> wlActiv1_1; + + CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterFloat32Workload, + NeonActivationFloat32Workload>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); + + armnn::INeonTensorHandle* sOut0 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::INeonTensorHandle* sOut1 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::INeonTensorHandle* activ0_0Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* activ0_1Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* activ1_0Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* activ1_1Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]); + + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(activ0_0Im); + BOOST_TEST(activ0_1Im); + BOOST_TEST(activ1_0Im); + BOOST_TEST(activ1_1Im); + + bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && + (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon) +{ + NeonWorkloadFactory factory; + CreateMemCopyWorkloads<CopyFromCpuToNeonWorkload,CopyFromNeonToCpuWorkload,INeonTensorHandle>(factory); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/CreateWorkloadRef.cpp b/src/armnn/backends/test/CreateWorkloadRef.cpp new file mode 100644 index 0000000000..e0eacebe1a --- /dev/null +++ b/src/armnn/backends/test/CreateWorkloadRef.cpp @@ -0,0 +1,414 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "backends/RefWorkloadFactory.hpp" +#include "backends/RefWorkloads.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "test/CreateWorkload.hpp" + +namespace +{ + +template<typename Workload> +void CheckInputOutput(std::unique_ptr<Workload> workload, const TensorInfo& inputInfo, const TensorInfo& outputInfo) +{ + auto queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST((inputHandle->GetTensorInfo() == inputInfo)); + BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo)); +} + +template <typename Workload> +void CheckInputsOutput(std::unique_ptr<Workload> workload, + const TensorInfo& inputInfo0, + const TensorInfo& inputInfo1, + const TensorInfo& outputInfo) +{ + auto queueDescriptor = workload->GetData(); + auto inputHandle0 = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto inputHandle1 = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST((inputHandle0->GetTensorInfo() == inputInfo0)); + BOOST_TEST((inputHandle1->GetTensorInfo() == inputInfo1)); + BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo)); +} +} + +BOOST_AUTO_TEST_SUITE(CreateWorkloadRef) + +template <typename ActivationWorkloadType> +static void RefCreateActivationWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateActivationWorkloadTest<ActivationWorkloadType>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateActivationWorkloadTest) + CheckInputOutput(std::move(workload), + TensorInfo({ 1, 1 }, ActivationWorkloadType::ms_DataType), + TensorInfo({ 1, 1 }, ActivationWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateActivationFloat32Workload) +{ + RefCreateActivationWorkloadTest<RefActivationFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateActivationUint8Workload) +{ + RefCreateActivationWorkloadTest<RefActivationUint8Workload>(); +} + +template <typename AdditionWorkloadType> +static void RefCreateAdditionWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateAdditionWorkloadTest<AdditionWorkloadType>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateAdditionWorkloadTest) + CheckInputsOutput(std::move(workload), + TensorInfo({ 2, 3 }, AdditionWorkloadType::ms_DataType), + TensorInfo({ 2, 3 }, AdditionWorkloadType::ms_DataType), + TensorInfo({ 2, 3 }, AdditionWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) +{ + RefCreateAdditionWorkloadTest<RefAdditionFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionUint8Workload) +{ + RefCreateAdditionWorkloadTest<RefAdditionUint8Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateBatchNormalizationWorkloadTest<RefBatchNormalizationFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest) + CheckInputOutput( + std::move(workload), TensorInfo({2, 3, 1, 1}, DataType::Float32), TensorInfo({2, 3, 1, 1}, DataType::Float32)); +} + +BOOST_AUTO_TEST_CASE(CreateConvolution2dWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateConvolution2dWorkloadTest<RefConvolution2dFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + CheckInputOutput(std::move(workload), + TensorInfo({2, 3, 8, 16}, DataType::Float32), + TensorInfo({2, 2, 2, 10}, DataType::Float32)); +} + +BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolution2dWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = + CreateDepthwiseConvolution2dWorkloadTest<RefDepthwiseConvolution2dFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + CheckInputOutput(std::move(workload), + TensorInfo({2, 3, 8, 16}, DataType::Float32), + TensorInfo({2, 9, 2, 10}, DataType::Float32)); +} + +template <typename FullyConnectedWorkloadType> +static void RefCreateFullyConnectedWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest) + float inputsQScale = FullyConnectedWorkloadType::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0; + float outputQScale = FullyConnectedWorkloadType::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0; + CheckInputOutput(std::move(workload), + TensorInfo({ 3, 1, 4, 5 }, FullyConnectedWorkloadType::ms_DataType, inputsQScale), + TensorInfo({ 3, 7 }, FullyConnectedWorkloadType::ms_DataType, outputQScale)); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat32Workload) +{ + RefCreateFullyConnectedWorkloadTest<RefFullyConnectedFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedUint8Workload) +{ + RefCreateFullyConnectedWorkloadTest<RefFullyConnectedUint8Workload>(); +} + +template <typename MultiplicationWorkloadType> +static void RefCreateMultiplicationWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateMultiplicationWorkloadTest<MultiplicationWorkloadType>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest) + CheckInputsOutput(std::move(workload), + TensorInfo({ 2, 3 }, MultiplicationWorkloadType::ms_DataType), + TensorInfo({ 2, 3 }, MultiplicationWorkloadType::ms_DataType), + TensorInfo({ 2, 3 }, MultiplicationWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload) +{ + RefCreateMultiplicationWorkloadTest<RefMultiplicationFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload) +{ + RefCreateMultiplicationWorkloadTest<RefMultiplicationUint8Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateNormalizationWorkloadTest<RefNormalizationFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + CheckInputOutput(std::move(workload), + TensorInfo({3, 5, 5, 1}, DataType::Float32), + TensorInfo({3, 5, 5, 1}, DataType::Float32)); +} + +template <typename Pooling2dWorkloadType> +static void RefCreatePooling2dWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreatePooling2dWorkloadTest<Pooling2dWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest) + CheckInputOutput( + std::move(workload), + TensorInfo({3, 2, 5, 5}, Pooling2dWorkloadType::ms_DataType), + TensorInfo({3, 2, 2, 4}, Pooling2dWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dFloat32Workload) +{ + RefCreatePooling2dWorkloadTest<RefPooling2dFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dUint8Workload) +{ + RefCreatePooling2dWorkloadTest<RefPooling2dUint8Workload>(); +} + +template <typename SoftmaxWorkloadType> +static void RefCreateSoftmaxWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest) + CheckInputOutput( + std::move(workload), + TensorInfo({4, 1}, SoftmaxWorkloadType::ms_DataType), + TensorInfo({4, 1}, SoftmaxWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat32Workload) +{ + RefCreateSoftmaxWorkloadTest<RefSoftmaxFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxUint8Workload) +{ + RefCreateSoftmaxWorkloadTest<RefSoftmaxUint8Workload>(); +} + +template <typename SplitterWorkloadType> +static void RefCreateSplitterWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateSplitterWorkloadTest<SplitterWorkloadType>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]); + BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 1, 7 }, SplitterWorkloadType::ms_DataType))); + auto outputHandle0 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 4 }, SplitterWorkloadType::ms_DataType))); + auto outputHandle1 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[1]); + BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 1, 1 }, SplitterWorkloadType::ms_DataType))); + auto outputHandle2 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[2]); + BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 1, 2 }, SplitterWorkloadType::ms_DataType))); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterFloat32Workload) +{ + RefCreateSplitterWorkloadTest<RefSplitterFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterUint8Workload) +{ + RefCreateSplitterWorkloadTest<RefSplitterUint8Workload>(); +} + +template <typename SplitterWorkloadType, typename MergerWorkloadType> +static void RefCreateSplitterMergerWorkloadTest() +{ + // Test that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer + // We test that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input + // of the merger. + + Graph graph; + RefWorkloadFactory factory; + auto workloads = CreateSplitterMergerWorkloadTest<SplitterWorkloadType, MergerWorkloadType>(factory, graph); + + auto wlSplitter = std::move(workloads.first); + auto wlMerger = std::move(workloads.second); + + //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::CpuTensorHandle* sOut0 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::CpuTensorHandle* sOut1 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::CpuTensorHandle* mIn0 = dynamic_cast<armnn::CpuTensorHandle*>(wlMerger->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* mIn1 = dynamic_cast<armnn::CpuTensorHandle*>(wlMerger->GetData().m_Inputs[1]); + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(mIn0); + BOOST_TEST(mIn1); + + bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat32) +{ + RefCreateSplitterMergerWorkloadTest<RefSplitterFloat32Workload, RefMergerFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMergerUint8) +{ + RefCreateSplitterMergerWorkloadTest<RefSplitterUint8Workload, RefMergerUint8Workload>(); +} + +template <typename SplitterWorkloadType, typename ActivationWorkloadType> +static void RefCreateSingleOutputMultipleInputsTest() +{ + // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We create a splitter with two outputs. That each of those outputs is used by two different activation layers + + Graph graph; + RefWorkloadFactory factory; + std::unique_ptr<SplitterWorkloadType> wlSplitter; + std::unique_ptr<ActivationWorkloadType> wlActiv0_0; + std::unique_ptr<ActivationWorkloadType> wlActiv0_1; + std::unique_ptr<ActivationWorkloadType> wlActiv1_0; + std::unique_ptr<ActivationWorkloadType> wlActiv1_1; + + CreateSplitterMultipleInputsOneOutputWorkloadTest<SplitterWorkloadType, + ActivationWorkloadType>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); + + armnn::CpuTensorHandle* sOut0 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::CpuTensorHandle* sOut1 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::CpuTensorHandle* activ0_0Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* activ0_1Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* activ1_0Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* activ1_1Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]); + + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(activ0_0Im); + BOOST_TEST(activ0_1Im); + BOOST_TEST(activ1_0Im); + BOOST_TEST(activ1_1Im); + + bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && + (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputsFloat32) +{ + RefCreateSingleOutputMultipleInputsTest<RefSplitterFloat32Workload, RefActivationFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputsUint8) +{ + RefCreateSingleOutputMultipleInputsTest<RefSplitterUint8Workload, RefActivationUint8Workload>(); +} + +template <typename ResizeBilinearWorkloadType> +static void RefCreateResizeBilinearTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateResizeBilinearWorkloadTest<ResizeBilinearWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateResizeBilinearWorkloadTest) + CheckInputOutput( + std::move(workload), + TensorInfo({ 2, 3, 4, 4 }, ResizeBilinearWorkloadType::ms_DataType), + TensorInfo({ 2, 3, 2, 2 }, ResizeBilinearWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat32) +{ + RefCreateResizeBilinearTest<RefResizeBilinearFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateResizeBilinearUint8) +{ + RefCreateResizeBilinearTest<RefResizeBilinearUint8Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat32) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateL2NormalizationWorkloadTest<RefL2NormalizationFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateL2NormalizationWorkloadTest) + CheckInputOutput( + std::move(workload), + TensorInfo({ 5, 20, 50, 67 }, RefL2NormalizationFloat32Workload::ms_DataType), + TensorInfo({ 5, 20, 50, 67 }, RefL2NormalizationFloat32Workload::ms_DataType)); +} + +template <typename ReshapeWorkloadType> +static void RefCreateReshapeWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest) + CheckInputOutput( + std::move(workload), + TensorInfo({ 4, 1 }, ReshapeWorkloadType::ms_DataType), + TensorInfo({ 1, 4 }, ReshapeWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) +{ + RefCreateReshapeWorkloadTest<RefReshapeFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) +{ + RefCreateReshapeWorkloadTest<RefReshapeUint8Workload>(); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/FullyConnectedTestImpl.hpp b/src/armnn/backends/test/FullyConnectedTestImpl.hpp new file mode 100644 index 0000000000..479da3fabc --- /dev/null +++ b/src/armnn/backends/test/FullyConnectedTestImpl.hpp @@ -0,0 +1,286 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +template<typename T, typename B> +LayerTestResult<T, 2> SimpleFullyConnectedTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::TensorInfo inputTensorInfo, + armnn::TensorInfo outputTensorInfo, + armnn::TensorInfo weightsDesc, + armnn::TensorInfo biasesDesc, + boost::multi_array<T, 2> weights, + boost::multi_array<B, 1> bias, + boost::multi_array<T, 4> input, + bool biasEnabled, + bool transposeWeights) +{ + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::FullyConnectedQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(weightsDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasesDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &weights[0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_BiasEnabled = biasEnabled; + data.m_Parameters.m_TransposeWeightMatrix = transposeWeights; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFullyConnected(data, info); + LayerTestResult<T, 2> result(outputTensorInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get()); + + return result; +} + +LayerTestResult<float, 2> FullyConnectedFloat32Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled, + bool transposeWeights) +{ + unsigned int inputWidth = 1; + unsigned int inputHeight = 1; + unsigned int inputChannels = 5; + unsigned int inputNum = 2; + + unsigned int outputChannels = 3; + unsigned int outputNum = 2; + + // Define the tensor descriptors + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo weightsDesc; + armnn::TensorInfo biasesDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels }; + unsigned int weightsShape[] = { inputChannels, outputChannels }; + if (transposeWeights) + { + std::swap(weightsShape[0], weightsShape[1]); + } + unsigned int biasShape[] = { outputChannels }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::DataType::Float32); + weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::DataType::Float32); + biasesDesc = armnn::TensorInfo(1, biasShape, armnn::DataType::Float32); + + LayerTestResult<float, 2> result(outputTensorInfo); + + boost::multi_array<float, 4> input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>( + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + + 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + }) + ); + + boost::multi_array<float, 2> weights = MakeTensor<float, 2>(weightsDesc, std::vector<float>( + { + .5f, 2.f, .5f, + .5f, 2.f, 1.f, + .5f, 2.f, 2.f, + .5f, 2.f, 3.f, + .5f, 2.f, 4.f + })); + + if (transposeWeights) + { + weights = MakeTensor<float, 2>(weightsDesc, std::vector<float>( + { + .5f, .5f, .5f, .5f, .5f, + 2.f, 2.f, 2.f, 2.f, 2.f, + .5f, 1.f, 2.f, 3.f, 4.f + })); + } + + + std::vector<float> biasValues({0.f, 0.f, 0.f}); + if (biasEnabled) + { + biasValues = std::vector<float>({10.f, 20.f, 30.f}); + } + boost::multi_array<float, 1> bias = MakeTensor<float, 1>(biasesDesc, biasValues); + + result = SimpleFullyConnectedTestImpl<float>( + workloadFactory, + inputTensorInfo, outputTensorInfo, + weightsDesc, biasesDesc, + weights, bias, input, + biasEnabled, transposeWeights + ); + + result.outputExpected = MakeTensor<float, 2>(outputTensorInfo, std::vector<float>( + { + 0.5f + 1.0f + 1.5f + 2.0f + 2.5f + biasValues[0], + 2.0f + 4.0f + 6.0f + 8.0f + 10.f + biasValues[1], + 0.5f + 2.0f + 6.0f + 12.f + 20.f + biasValues[2], + + 2.5f + 2.0f + 1.5f + 1.0f + 0.5f + biasValues[0], + 10.0f + 8.0f + 6.0f + 4.0f + 2.f + biasValues[1], + 2.5f + 4.0f + 6.0f + 6.f + 4.f + biasValues[2] + }) + ); + + return result; +} + +LayerTestResult<uint8_t, 2> FullyConnectedUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) +{ + constexpr static unsigned int inputWidth = 3u; + constexpr static unsigned int inputHeight = 2u; + constexpr static unsigned int inputChannels = 1u; + + constexpr static unsigned int inputSize = inputWidth * inputHeight * inputChannels; + + constexpr static unsigned int outputChannels = 2u; + + armnn::TensorInfo inputTensorInfo({ 1, inputChannels, inputHeight, inputWidth }, armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(0.1f); + inputTensorInfo.SetQuantizationOffset(63); + + armnn::TensorInfo outputTensorInfo({ 1, outputChannels }, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(5.f); + outputTensorInfo.SetQuantizationOffset(biasEnabled ? -50 : 10); + + armnn::TensorInfo weightsDesc({ outputChannels, inputSize }, armnn::DataType::QuantisedAsymm8); + weightsDesc.SetQuantizationScale(0.2f); + weightsDesc.SetQuantizationOffset(93); + + armnn::TensorInfo biasesDesc({ outputChannels }, armnn::DataType::Signed32); + biasesDesc.SetQuantizationScale(inputTensorInfo.GetQuantizationScale() * weightsDesc.GetQuantizationScale()); + biasesDesc.SetQuantizationOffset(0); + + LayerTestResult<uint8_t, 2> result(outputTensorInfo); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>{51, 124, 28, + 251, 8, 92}); + + auto weights = MakeTensor<uint8_t, 2>(weightsDesc, std::vector<uint8_t>{51, 193, 42, 53, 175, 34, + 210, 145, 23, 74, 34, 150}); + + // scale = 0.02 + // offset = 0 + auto bias = MakeTensor<int32_t, 1>(biasesDesc, std::vector<int32_t>{9250, 67500}); + + result = SimpleFullyConnectedTestImpl<uint8_t>( + workloadFactory, + inputTensorInfo, outputTensorInfo, + weightsDesc, biasesDesc, + weights, bias, input, + biasEnabled, true + ); + + // manually calculated + // note one of these values has been clamped to 0 + if (biasEnabled) + { + result.outputExpected = MakeTensor<uint8_t, 2>(outputTensorInfo, std::vector<uint8_t>{0, 242}); + } + else + { + result.outputExpected = MakeTensor<uint8_t, 2>(outputTensorInfo, std::vector<uint8_t>{0, 32}); + } + + return result; +} + + + +// +// ArmNN variant of the AndroidNN fully_connected_float_large test. +// +// Tests the fully connected layer with large values, optionally transposing weights. +// Note this is templated for consistency, but the nature of this tests makes it unlikely to be useful in Uint8 mode. +// +template<typename T> +LayerTestResult<T, 2> FullyConnectedLargeTestCommon(armnn::IWorkloadFactory& workloadFactory, + bool transposeWeights, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int inputWidth = 1; + unsigned int inputHeight = 1; + unsigned int inputChannels = 5; + unsigned int inputNum = 1; + + unsigned int outputChannels = 1; + unsigned int outputNum = 1; + + // Define the tensor descriptors + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo weightsDesc; + armnn::TensorInfo biasesDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels }; + unsigned int weightsShape[] = { inputChannels, outputChannels }; + if (transposeWeights) + { + std::swap(weightsShape[0], weightsShape[1]); + } + + unsigned int biasShape[] = { outputChannels }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::GetDataType<T>()); + weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::GetDataType<T>()); + biasesDesc = armnn::TensorInfo(1, biasShape, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult<T, 2> result(outputTensorInfo); + + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f, + }) + ); + + boost::multi_array<T, 2> weights = MakeTensor<T, 2>(weightsDesc, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 3.0f, 4.0f, 5.0f, 6.0f + }) + ); + + std::vector<T> biasValues({900000.f}); + boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasesDesc, biasValues); + + result = SimpleFullyConnectedTestImpl<T>( + workloadFactory, + inputTensorInfo, outputTensorInfo, + weightsDesc, biasesDesc, + weights, bias, input, + true, transposeWeights + ); + + result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 965432.0f, + }) + ); + + return result; +} diff --git a/src/armnn/backends/test/IsLayerSupportedTest.cpp b/src/armnn/backends/test/IsLayerSupportedTest.cpp new file mode 100644 index 0000000000..4b4c9f6099 --- /dev/null +++ b/src/armnn/backends/test/IsLayerSupportedTest.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "test/TensorHelpers.hpp" +#include "LayerTests.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include <Layers.hpp> + +#include <string> +#include <iostream> +#include <backends/ClWorkloadFactory.hpp> +#include <backends/NeonWorkloadFactory.hpp> + +#include "IsLayerSupportedTestImpl.hpp" + + +BOOST_AUTO_TEST_SUITE(IsLayerSupported) + +BOOST_AUTO_TEST_CASE(IsLayerSupportedLayerTypeMatches) +{ + LayerTypeMatchesTest(); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Reference) +{ + armnn::RefWorkloadFactory factory; + IsLayerSupportedTests<armnn::RefWorkloadFactory, armnn::DataType::Float32>(&factory); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Reference) +{ + armnn::RefWorkloadFactory factory; + IsLayerSupportedTests<armnn::RefWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory); +} + +#ifdef ARMCOMPUTENEON_ENABLED +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Neon) +{ + armnn::NeonWorkloadFactory factory; + IsLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::DataType::Float32>(&factory); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Neon) +{ + armnn::NeonWorkloadFactory factory; + IsLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory); +} +#endif //#ifdef ARMCOMPUTENEON_ENABLED + + +#ifdef ARMCOMPUTECL_ENABLED +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Cl) +{ + armnn::ClWorkloadFactory factory; + IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::Float32>(&factory); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Cl) +{ + armnn::ClWorkloadFactory factory; + IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory); +} +#endif //#ifdef ARMCOMPUTECL_ENABLED + +BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file diff --git a/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp new file mode 100644 index 0000000000..abc9806737 --- /dev/null +++ b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp @@ -0,0 +1,440 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Graph.hpp" + +#include <boost/core/ignore_unused.hpp> + +namespace +{ +armnn::Graph dummyGraph; + +// Make a dummy TensorInfo object +template<armnn::DataType DataType> +armnn::TensorInfo MakeDummyTensorInfo() +{ + return armnn::TensorInfo({2,2,2,2}, DataType); +} + + +// Make a dummy WorkloadInfo using a dummy TensorInfo. +template<armnn::DataType DataType> +armnn::WorkloadInfo MakeDummyWorkloadInfo(unsigned int numInputs, unsigned int numOutputs) +{ + armnn::WorkloadInfo info; + for (unsigned int i=0; i < numInputs; i++) + { + info.m_InputTensorInfos.push_back(MakeDummyTensorInfo<DataType>()); + } + for (unsigned int o=0; o < numOutputs; o++) + { + info.m_OutputTensorInfos.push_back(MakeDummyTensorInfo<DataType>()); + } + return info; +} + +// template class to create a dummy layer (2 parameters) +template<typename LayerType, typename DescType = typename LayerType::DescriptorType> +struct DummyLayer +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer<LayerType>(DescType(), ""); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + LayerType* m_Layer; +}; + +// template class to create a dummy layer (1 parameter) +template<typename LayerType> +struct DummyLayer<LayerType, void> +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer<LayerType>(""); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + LayerType* m_Layer; +}; + +template<> +struct DummyLayer<armnn::ConstantLayer, void> +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer<armnn::ConstantLayer>(std::shared_ptr<armnn::ScopedCpuTensorHandle>(), ""); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::ConstantLayer* m_Layer; +}; + +template<> +struct DummyLayer<armnn::InputLayer, armnn::LayerBindingId> +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer<armnn::InputLayer>(armnn::LayerBindingId(), ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::InputLayer* m_Layer; +}; + +template<> +struct DummyLayer<armnn::MergerLayer> +{ + DummyLayer() + { + armnn::OriginsDescriptor desc(2); + m_Layer = dummyGraph.AddLayer<armnn::MergerLayer>(desc, ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::MergerLayer* m_Layer; +}; + +template<> +struct DummyLayer<armnn::OutputLayer, armnn::LayerBindingId> +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer<armnn::OutputLayer>(armnn::LayerBindingId(), ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::OutputLayer* m_Layer; +}; + +template<> +struct DummyLayer<armnn::SplitterLayer> +{ + DummyLayer() + { + armnn::ViewsDescriptor desc(1); + m_Layer = dummyGraph.AddLayer<armnn::SplitterLayer>(desc, ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::SplitterLayer* m_Layer; +}; + +template <typename ConvolutionLayerType> +struct DummyConvolutionLayer +{ + DummyConvolutionLayer() + { + typename ConvolutionLayerType::DescriptorType desc; + m_Layer = dummyGraph.AddLayer<ConvolutionLayerType>(desc, ""); + m_Layer->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_Bias = std::make_unique<armnn::ScopedCpuTensorHandle>( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + } + ~DummyConvolutionLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + ConvolutionLayerType* m_Layer; +}; + +template<> +struct DummyLayer<armnn::Convolution2dLayer> + : public DummyConvolutionLayer<armnn::Convolution2dLayer> +{ +}; + +template<> +struct DummyLayer<armnn::DepthwiseConvolution2dLayer> + : public DummyConvolutionLayer<armnn::DepthwiseConvolution2dLayer> +{ +}; + +// Tag for giving LayerType entries a unique strong type each. +template<armnn::LayerType> +struct Tag{}; + +#define DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, descType) \ +template<armnn::DataType DataType> \ +struct LayerTypePolicy<armnn::LayerType::name, DataType> \ +{ \ + using Type = armnn::name##Layer; \ + using Desc = descType; \ + using QueueDesc = armnn::name##QueueDescriptor; \ + constexpr static const char* NameStr = #name; \ + \ + static std::unique_ptr<armnn::IWorkload> MakeDummyWorkload(armnn::IWorkloadFactory *factory, \ + unsigned int nIn, unsigned int nOut) \ + { \ + QueueDesc desc; \ + armnn::WorkloadInfo info = MakeDummyWorkloadInfo<DataType>(nIn, nOut); \ + return factory->Create##name(desc, info); \ + } \ +}; + +// define a layer policy specialization for use with the IsLayerSupported tests. +// Use this version for layers whose constructor takes 1 parameter(name). +#define DECLARE_LAYER_POLICY_1_PARAM(name) DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, void) + +// define a layer policy specialization for use with the IsLayerSupported tests. +// Use this version for layers whose constructor takes 2 parameters(descriptor and name). +#define DECLARE_LAYER_POLICY_2_PARAM(name) DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, armnn::name##Descriptor) + +// Layer policy template +template<armnn::LayerType Type, armnn::DataType DataType> +struct LayerTypePolicy; + +// Every entry in the armnn::LayerType enum must be accounted for below. +DECLARE_LAYER_POLICY_2_PARAM(Activation) + +DECLARE_LAYER_POLICY_1_PARAM(Addition) + +DECLARE_LAYER_POLICY_2_PARAM(BatchNormalization) + +DECLARE_LAYER_POLICY_1_PARAM(Constant) + +DECLARE_LAYER_POLICY_2_PARAM(Convolution2d) + +DECLARE_LAYER_POLICY_1_PARAM(MemCopy) + +DECLARE_LAYER_POLICY_2_PARAM(DepthwiseConvolution2d) + +DECLARE_LAYER_POLICY_2_PARAM(FakeQuantization) + +DECLARE_LAYER_POLICY_1_PARAM(Floor) + +DECLARE_LAYER_POLICY_2_PARAM(FullyConnected) + +DECLARE_LAYER_POLICY_CUSTOM_PARAM(Input, armnn::LayerBindingId) + +DECLARE_LAYER_POLICY_1_PARAM(L2Normalization) + +DECLARE_LAYER_POLICY_2_PARAM(Merger) + +DECLARE_LAYER_POLICY_1_PARAM(Multiplication) + +DECLARE_LAYER_POLICY_2_PARAM(Normalization) + +DECLARE_LAYER_POLICY_CUSTOM_PARAM(Output, armnn::LayerBindingId) + +DECLARE_LAYER_POLICY_2_PARAM(Permute) + +DECLARE_LAYER_POLICY_2_PARAM(Pooling2d) + +DECLARE_LAYER_POLICY_2_PARAM(ResizeBilinear) + +DECLARE_LAYER_POLICY_2_PARAM(Softmax) + +DECLARE_LAYER_POLICY_2_PARAM(Splitter) + +DECLARE_LAYER_POLICY_2_PARAM(Reshape) + + +// Generic implementation to get the number of input slots for a given layer type; +template<armnn::LayerType Type> +unsigned int GetNumInputs(const armnn::Layer& layer) +{ + return layer.GetNumInputSlots(); +} + +// Generic implementation to get the number of output slots for a given layer type; +template<armnn::LayerType Type> +unsigned int GetNumOutputs(const armnn::Layer& layer) +{ + return layer.GetNumOutputSlots(); +} + +template<> +unsigned int GetNumInputs<armnn::LayerType::Merger>(const armnn::Layer& layer) +{ + boost::ignore_unused(layer); + return 2; +} + +// Test that the IsLayerSupported() function returns the correct value. +// We determine the correct value by *trying* to create the relevant workload and seeing if it matches what we expect. +// Returns true if expectations are met, otherwise returns false. +template<typename FactoryType, armnn::DataType DataType, armnn::LayerType Type> +bool IsLayerSupportedTest(FactoryType *factory, Tag<Type>) +{ + using LayerPolicy = LayerTypePolicy<Type, DataType>; + using LayerType = typename LayerPolicy::Type; + using LayerDesc = typename LayerPolicy::Desc; + DummyLayer<LayerType, LayerDesc> layer; + + unsigned int numIn = GetNumInputs<Type>(*layer.m_Layer); + unsigned int numOut = GetNumOutputs<Type>(*layer.m_Layer); + + // Make another dummy layer just to make IsLayerSupported have valid inputs + DummyLayer<armnn::ConstantLayer, void> previousLayer; + // Set output of previous layer to a dummy tensor + armnn::TensorInfo output = MakeDummyTensorInfo<DataType>(); + previousLayer.m_Layer->GetOutputSlot(0).SetTensorInfo(output); + // Connect all outputs of previous layer to inputs of tested layer + for (unsigned int i = 0; i < numIn; i++) + { + armnn::IOutputSlot& previousLayerOutputSlot = previousLayer.m_Layer->GetOutputSlot(0); + armnn::IInputSlot& layerInputSlot = layer.m_Layer->GetInputSlot(i); + previousLayerOutputSlot.Connect(layerInputSlot); + } + // Set outputs of tested layer to a dummy tensor + for (unsigned int i = 0; i < numOut; i++) + { + layer.m_Layer->GetOutputSlot(0).SetTensorInfo(output); + } + + std::string layerName = LayerPolicy::NameStr; + std::string reasonIfUnsupported; + if (FactoryType::IsLayerSupported(*layer.m_Layer, DataType, reasonIfUnsupported)) + { + std::string errorMsg = " layer expected support but found none."; + try + { + bool retVal = LayerPolicy::MakeDummyWorkload(factory, numIn, numOut).get() != nullptr; + BOOST_CHECK_MESSAGE(retVal, layerName << errorMsg); + return retVal; + } + catch (const armnn::InvalidArgumentException& e) + { + boost::ignore_unused(e); + // This is ok since we throw InvalidArgumentException when creating the dummy workload. + return true; + } + catch(const std::exception& e) + { + errorMsg = e.what(); + BOOST_TEST_ERROR(layerName << ": " << errorMsg); + return false; + } + catch (...) + { + errorMsg = "Unexpected error while testing support for "; + BOOST_TEST_ERROR(errorMsg << layerName); + return false; + } + } + else + { + std::string errorMsg = "layer expected no support (giving reason: " + reasonIfUnsupported + ") but found some."; + try + { + bool retVal = LayerPolicy::MakeDummyWorkload(factory, numIn, numOut).get() == nullptr; + BOOST_CHECK_MESSAGE(retVal, layerName << errorMsg); + return retVal; + } + // These two exceptions are ok: For workloads that are partially supported, attempting to instantiate them + // using parameters that make IsLayerSupported() return false should throw an + // InvalidArgumentException or UnimplementedException + catch(const armnn::InvalidArgumentException& e) + { + boost::ignore_unused(e); + return true; + } + catch (const armnn::UnimplementedException& e) + { + boost::ignore_unused(e); + return true; + } + catch(const std::exception& e) + { + errorMsg = e.what(); + BOOST_TEST_ERROR(layerName << ": " << errorMsg); + return false; + } + catch (...) + { + errorMsg = "Unexpected error while testing support for "; + BOOST_TEST_ERROR(errorMsg << layerName); + return false; + } + } +} + +// Helper function to compute the next type in the LayerType enum +constexpr armnn::LayerType NextType(armnn::LayerType type) +{ + return static_cast<armnn::LayerType>(static_cast<int>(type)+1); +} + +// Termination function for determining the end of the LayerType enumeration +template<typename FactoryType, armnn::DataType DataType, armnn::LayerType Type> +bool IsLayerSupportedTestsImpl(FactoryType *factory, Tag<armnn::LayerType::LastLayer>) +{ + return IsLayerSupportedTest<FactoryType, DataType, Type>(factory, Tag<Type>()); +}; + +// Recursive function to test and entry in the LayerType enum and then iterate on the next entry. +template<typename FactoryType, armnn::DataType DataType, armnn::LayerType Type> +bool IsLayerSupportedTestsImpl(FactoryType *factory, Tag<Type>) +{ + bool v = IsLayerSupportedTest<FactoryType, DataType, Type>(factory, Tag<Type>()); + + return v && + IsLayerSupportedTestsImpl<FactoryType, DataType, NextType(Type)> + (factory, Tag<NextType(Type)>()); +}; + +// Helper function to pass through to the test framework. +template<typename FactoryType, armnn::DataType DataType> +bool IsLayerSupportedTests(FactoryType *factory) +{ + return IsLayerSupportedTestsImpl<FactoryType, DataType>(factory, Tag<armnn::LayerType::FirstLayer>()); +}; + +template<armnn::LayerType Type> +bool TestLayerTypeMatches() +{ + using LayerPolicy = LayerTypePolicy<Type, armnn::DataType::Float32>; + using LayerType = typename LayerPolicy::Type; + using LayerDesc = typename LayerPolicy::Desc; + DummyLayer<LayerType, LayerDesc> layer; + + std::stringstream ss; + ss << LayerPolicy::NameStr << " layer type mismatches expected layer type value."; + bool v = Type == layer.m_Layer->GetType(); + BOOST_CHECK_MESSAGE(v, ss.str()); + return v; +}; + +template<armnn::LayerType Type> +bool LayerTypeMatchesTestImpl(Tag<armnn::LayerType::LastLayer>) +{ + return TestLayerTypeMatches<Type>(); +}; + +template<armnn::LayerType Type> +bool LayerTypeMatchesTestImpl(Tag<Type>) +{ + return TestLayerTypeMatches<Type>() && + LayerTypeMatchesTestImpl<NextType(Type)>(Tag<NextType(Type)>()); +}; + +bool LayerTypeMatchesTest() +{ + return LayerTypeMatchesTestImpl<armnn::LayerType::FirstLayer>(Tag<armnn::LayerType::FirstLayer>()); +}; + +} //namespace diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp new file mode 100644 index 0000000000..76681f9a93 --- /dev/null +++ b/src/armnn/backends/test/LayerTests.cpp @@ -0,0 +1,3884 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "LayerTests.hpp" + +#include "test/TensorHelpers.hpp" +#include "TensorCopyUtils.hpp" + +#include <boost/test/unit_test.hpp> + +#include "armnn/LayerSupport.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#endif + +#include <algorithm> +#include <boost/cast.hpp> + +#include "WorkloadTestUtils.hpp" +#include "Conv2dTestImpl.hpp" +#include "BatchNormTestImpl.hpp" +#include "ActivationTestImpl.hpp" +#include "Pooling2dTestImpl.hpp" +#include "ReshapeTestImpl.hpp" +#include "FullyConnectedTestImpl.hpp" +#include "SplitterTestImpl.hpp" +#include "SoftmaxTestImpl.hpp" +#include "NormTestImpl.hpp" +#include "PermuteTestImpl.hpp" + +// 3-channel 16x8 image used as common input data for a number of Conv2d tests +static std::vector<float> ConvInput3x8x16({ + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}); + +// 2-channel bias used by a number of Conv2d tests +static std::vector<float> Bias2({0, 2}); + +// Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled +template<typename T> +boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale, int32_t qOffset) +{ + if(biasEnabled) + { + armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, armnn::GetDataType<T>()); + boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, qOffset, Bias2)); + return bias; + } + else + { + return boost::multi_array<T, 1>(); + } +} + +template<typename T> +LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + // Use common single-batch 3-channel 16x8 image + armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16)); + + // Use a 2-element batch with 3-channel 3x5 kernels + armnn::TensorInfo kernelDesc({2, 3, 5, 3}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // Expected output is 2 batch elements of a 1-channel 14x4 image + armnn::TensorInfo outputDesc({1, 2, 4, 14}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, + -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, + -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, + + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl<T>(workloadFactory, + input, + kernel, + GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset), + expectedOutput, + qScale, + qOffset); +} + +template<typename T> +LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path + + // Use common single-batch 3-channel 16x8 image + armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16)); + + // Use a 2-element batch of 3-channel 3x3 kernels + armnn::TensorInfo kernelDesc({2, 3, 3, 3}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // Expected output is 1 batch of a 2-channel 14x6 image + armnn::TensorInfo outputDesc({1, 2, 6, 14}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl<T>(workloadFactory, + input, + kernel, + GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset), + expectedOutput, + qScale, + qOffset); +} + +LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x5TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled); +} + +LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x5TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled); +} + +LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x3TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled); +} + +LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x3TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled); +} + +template<typename T> +LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon( + armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + // Use a single-batch 1-channel 3x3 image as input + armnn::TensorInfo inputDesc({1, 1, 3, 3}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 11,21,31, + 12,22,32, + 13,23,33 + }))); + + // Use 1 batch of a 1-channel 2x2 kernel + armnn::TensorInfo kernelDesc({1, 1, 2, 2}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -11,-21, + -12,-22, + }))); + +// Expected output is 1 batch of a 1-channel 6x8 image +// Manually calculated like this: +//[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] +//[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..] +//[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..] +//[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..] +//[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..] +//[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] +//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..] + armnn::TensorInfo outputDesc({1, 1, 8, 6}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 0, 0, 0, 0, 0, 0, + -242, -594, -934, -372, 0, 0, + -495, -1190, -1850, -725, 0, 0, + -538, -1256, -1916, -748, 0, 0, + -273, -626, -946, -363, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl<T>(workloadFactory, + input, + kernel, + GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset), + expectedOutput, + qScale, + qOffset, + 1, // padding left + 2, // padding top + 3, // padding right + 4); // padding bottom +} + +template<typename T> +LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + // Use a single-batch 1-channel 5x5 image as input + armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>()); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 11,21,31,41,51, + 12,22,32,42,52, + 13,23,33,43,53, + 14,24,34,44,54, + 15,25,35,45,55, + }))); + + // Use 1 batch of a 1-channel 4x4 kernel + armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -11,-21,-31,-41, + -12,-22,-32,-42, + -13,-23,-33,-43, + -14,-24,-34,-44, + }))); + + // Expected output is 1 batch of a 1-channel 5x5 image + armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>()); + std::vector<T> myVec(outputDesc.GetNumElements(), 0); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -4723, -7044, -9324, -6253, -3542, + -7140, -10580, -13940, -9300, -5230, + -9590, -14120, -18520, -12290, -6860, + -9980, -14560, -18960, -12560, -7000, + -7518, -10904, -14144, -9318, -5152, + }))); + + return SimpleConvolution2dTestImpl<T>(workloadFactory, + input, + kernel, + GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset), + expectedOutput, + qScale, + qOffset, + 1, // padding left + 2, // padding top + 2, // padding right + 1); // padding bottom +} + +LayerTestResult<float, 4> +Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleConvolution2dAsymmetricPaddingTestCommon<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dTestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled); +} + +LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dDepthMul1TestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled); +} + +LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dTestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled); +} + +LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dDepthMul1TestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled); +} + +LayerTestResult<float, 4> Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) +{ + return Convolution1dTestImpl<float>(workloadFactory, 0.0f, 0, biasEnabled); +} + +LayerTestResult<uint8_t, 4> Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) +{ + return Convolution1dTestImpl<uint8_t>(workloadFactory, 0.1f, 128, biasEnabled); +} + +LayerTestResult<float,4> CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + return CompareConvolution2dTestImpl<float>(workloadFactory, refWorkloadFactory); +} + +template<typename T> +LayerTestResult<T,4> CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + return CompareDepthwiseConvolution2dTestImpl<T>(workloadFactory, refWorkloadFactory); +} + +template LayerTestResult<float, 4> CompareDepthwiseConvolution2dTest<float>( + armnn::IWorkloadFactory&, armnn::IWorkloadFactory&); +template LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dTest<uint8_t>( + armnn::IWorkloadFactory&, armnn::IWorkloadFactory&); + +LayerTestResult<float,4> SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory) +{ + auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; + auto normChannel = armnn::NormalizationAlgorithmChannel::Across; + return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod); +} + +LayerTestResult<float,4> SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory) +{ + auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; + auto normChannel = armnn::NormalizationAlgorithmChannel::Within; + return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod); +} + +LayerTestResult<float,2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta) +{ + return SimpleSoftmaxTestImpl<float>(workloadFactory, beta); +} + +LayerTestResult<uint8_t,2> SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta) +{ + return SimpleSoftmaxTestImpl<uint8_t>(workloadFactory, beta); +} + +LayerTestResult<float,4> CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod) +{ + return CompareNormalizationTestImpl(workloadFactory, refWorkloadFactory, normChannel, normMethod); +} + +LayerTestResult<float,2> CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta) +{ + return CompareSoftmaxTestImpl<float>(workloadFactory, refWorkloadFactory, beta); +} + +LayerTestResult<uint8_t,2> CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta) +{ + return CompareSoftmaxTestImpl<uint8_t>(workloadFactory, refWorkloadFactory, beta); +} + +std::vector<LayerTestResult<float,3>> SplitterTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SplitterTestCommon<float>(workloadFactory); +} + +std::vector<LayerTestResult<uint8_t,3>> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SplitterTestCommon<uint8_t>(workloadFactory, 1.0f, 0); +} + +LayerTestResult<float, 3> CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory) +{ + return CopyViaSplitterTestImpl<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return CopyViaSplitterTestImpl<uint8_t>(workloadFactory, 1.0f, 0); +} + +LayerTestResult<float,3> MergerTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int outputWidth = 5; + unsigned int outputHeight = 6; + unsigned int outputChannels = 3; + + unsigned int inputWidth1 = 2; + unsigned int inputHeight1 = 2; + unsigned int inputChannels1 = 3; + + unsigned int inputWidth2 = 2; + unsigned int inputHeight2 = 4; + unsigned int inputChannels2 = 3; + + unsigned int inputWidth3 = 3; + unsigned int inputHeight3 = 6; + unsigned int inputChannels3 = 2; + + unsigned int inputWidth4 = 3; + unsigned int inputHeight4 = 6; + unsigned int inputChannels4 = 1; + + // Define the tensor descriptors + armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32); + armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32); + armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32); + armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::Float32); + armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::Float32); + + LayerTestResult<float,3> ret(outputTensorInfo); + + + ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>( + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + + }) + ); + + + auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>( + { + 1.0f, 2.0f, + 6.0f, 7.0f, + + 31.0f, 32.0f, + 36.0f, 37.0f, + + 61.0f, 62.0f, + 66.0f, 67.0f, + }) + ); + + auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>( + { + 11.0f, 12.0f, + 16.0f, 17.0f, + 21.0f, 22.0f, + 26.0f, 27.0f, + + 41.0f, 42.0f, + 46.0f, 47.0f, + 51.0f, 52.0f, + 56.0f, 57.0f, + + 71.0f, 72.0f, + 76.0f, 77.0f, + 81.0f, 82.0f, + 86.0f, 87.0f, + }) + ); + + auto input3 = MakeTensor<float, 3>(inputTensorInfo3, std::vector<float>( + { + 3.0f, 4.0f, 5.0f, + 8.0f, 9.0f, 10.0f, + 13.0f, 14.0f, 15.0f, + 18.0f, 19.0f, 20.0f, + 23.0f, 24.0f, 25.0f, + 28.0f, 29.0f, 30.0f, + + 33.0f, 34.0f, 35.0f, + 38.0f, 39.0f, 40.0f, + 43.0f, 44.0f, 45.0f, + 48.0f, 49.0f, 50.0f, + 53.0f, 54.0f, 55.0f, + 58.0f, 59.0f, 60.0f, + }) + ); + + + auto input4 = MakeTensor<float, 3>(inputTensorInfo4, std::vector<float>( + { + 63.0f, 64.0f, 65.0f, + 68.0f, 69.0f, 70.0f, + 73.0f, 74.0f, 75.0f, + 78.0f, 79.0f, 80.0f, + 83.0f, 84.0f, 85.0f, + 88.0f, 89.0f, 90.0f, + }) + ); + + std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of input[0] + armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); + + std::vector<unsigned int> wOrigin2 = {0, 2, 0}; //extent of the window is defined by size of input[1] + armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); + + std::vector<unsigned int> wOrigin3 = {0, 0, 2}; //extent of the window is defined by size of input[2] + armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); + + std::vector<unsigned int> wOrigin4 = {2, 0, 2}; //extent of the window is defined by size of input[3] + armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); + + + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo1); + + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo2); + + std::unique_ptr<armnn::ITensorHandle> inputHandle3 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo3); + + std::unique_ptr<armnn::ITensorHandle> inputHandle4 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo4); + + + armnn::MergerQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get()); + AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_ViewOrigins.push_back(window1); + data.m_ViewOrigins.push_back(window2); + data.m_ViewOrigins.push_back(window3); + data.m_ViewOrigins.push_back(window4); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + inputHandle3->Allocate(); + inputHandle4->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); + CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]); + CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<float,4> AdditionTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int batchSize = 2; + unsigned int channels = 2; + unsigned int height = 2; + unsigned int width = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, channels, height, width}; + + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + + auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>( + { + 0.0f, 2.0f, 1.0f, + 0.2f, 1.0f, 2.0f, + + 1.0f, 2.0f, 1.0f, + 0.2f, 1.0f, 2.0f, + + 0.0f, 2.0f, 1.0f, + 4.2f, 1.0f, 2.0f, + + 0.0f, 0.0f, 1.0f, + 0.2f, 1.0f, 2.0f, + })); + + auto input2 = MakeTensor<float, 4>(inputTensorInfo2, std::vector<float>( + { + 1.0f, 2.0f, 1.0f, + 0.0f, 1.0f, 2.0f, + + 1.0f, 2.0f, -2.0f, + 0.2f, 1.0f, 2.0f, + + 0.0f, 2.0f, 1.0f, + 4.2f, 0.0f, -3.0f, + + 0.0f, 0.0f, 1.0f, + 0.7f, 1.0f, 5.0f, + })); + + LayerTestResult<float,4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>( + { + 1.0f, 4.0f, 2.0f, + 0.2f, 2.0f, 4.0f, + + 2.0f, 4.0f, -1.0f, + 0.4f, 2.0f, 4.0f, + + 0.0f, 4.0f, 2.0f, + 8.4f, 1.0f, -1.0f, + + 0.0f, 0.0f, 2.0f, + 0.9f, 2.0f, 7.0f, + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template <typename T> +LayerTestResult<T, 4> AdditionBroadcastTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 1}, armnn::GetDataType<T>()); + armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 2, 3}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>()); + + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo1.SetQuantizationScale(qScale); + inputTensorInfo1.SetQuantizationOffset(qOffset); + inputTensorInfo2.SetQuantizationScale(qScale); + inputTensorInfo2.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, + { + 0.0f, + 1.0f, + + 2.0f, + 3.0f, + + 4.0f, + 5.0f, + })); + + auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset, + { + 0.5f, 1.5f, 2.5f, + 3.5f, 4.5f, 5.5f, + })); + + LayerTestResult<T,4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, + { + 0.5f, 1.5f, 2.5f, + 4.5f, 5.5f, 6.5f, + + 2.5f, 3.5f, 4.5f, + 6.5f, 7.5f, 8.5f, + + 4.5f, 5.5f, 6.5f, + 8.5f, 9.5f, 10.5f, + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template <typename T> +LayerTestResult<T, 4> AdditionBroadcast1ElementTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>()); + armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 1, 1}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>()); + + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo1.SetQuantizationScale(qScale); + inputTensorInfo1.SetQuantizationOffset(qOffset); + inputTensorInfo2.SetQuantizationScale(qScale); + inputTensorInfo2.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, + { + 0.0f, 1.0f, 2.0f, + 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, + 9.0f, 10.0f, 11.0f, + 12.0f, 13.0f, 14.0f, + 15.0f, 16.0f, 17.0f, + })); + + auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset, + { + 0.5f, + })); + + LayerTestResult<T,4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, + { + 0.5f, 1.5f, 2.5f, + 3.5f, 4.5f, 5.5f, + 6.5f, 7.5f, 8.5f, + 9.5f, 10.5f, 11.5f, + 12.5f, 13.5f, 14.5f, + 15.5f, 16.5f, 17.5f, + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<float, 4> AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcastTestImpl<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<uint8_t, 4> AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcastTestImpl<uint8_t>(workloadFactory, 2.f, 0); +} + +LayerTestResult<float, 4> AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcast1ElementTestImpl<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<uint8_t, 4> AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcast1ElementTestImpl<uint8_t>(workloadFactory, 0.1333333f, 128); +} + +LayerTestResult<float,4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + unsigned int batchSize = 4; + unsigned int channels = 1; + unsigned int height = 2; + unsigned int width = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, channels, height, width}; + + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 1232); + auto input2 = MakeRandomTensor<float, 4>(inputTensorInfo2, 456); + + LayerTestResult<float,4> ret(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::AdditionQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo1, inputHandle1Ref.get()); + SetWorkloadInput(refData, refInfo, 1, inputTensorInfo2, inputHandle2Ref.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateAddition(refData, refInfo); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + inputHandle1Ref->Allocate(); + inputHandle2Ref->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 2; + const unsigned int channelCount = 2; + const unsigned int batchSize = 2; + + armnn::TensorInfo inputTensorInfo0; + armnn::TensorInfo inputTensorInfo1; + armnn::TensorInfo outputTensorInfo; + + constexpr unsigned int shape[] = { batchSize, channelCount, height, width }; + constexpr std::size_t dimensionCount = std::extent<decltype(shape)>::value; + + inputTensorInfo0 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32); + inputTensorInfo1 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32); + + auto input0 = MakeTensor<float, 4>(inputTensorInfo0, std::vector<float>({ + 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4 })); + + auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>({ + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5 })); + + LayerTestResult<float,4> ret(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::MultiplicationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info); + + inputHandle0->Allocate(); + inputHandle1->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ + 2, 2, 2, 2, 6, 6, 6, 6, + 12, 12, 12, 12, 20, 20, 20, 20 })); + + return ret; +} + +LayerTestResult<float,4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + const unsigned int width = 16; + const unsigned int height = 32; + const unsigned int channelCount = 2; + const unsigned int batchSize = 5; + + armnn::TensorInfo inputTensorInfo0; + armnn::TensorInfo inputTensorInfo1; + armnn::TensorInfo outputTensorInfo; + + constexpr unsigned int shape[] = { batchSize, channelCount, height, width }; + + inputTensorInfo0 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + LayerTestResult<float,4> comparisonResult(outputTensorInfo); + + auto input0 = MakeRandomTensor<float, 4>(inputTensorInfo0, 803506992); + auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 54902257); + + std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle0Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::MultiplicationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::MultiplicationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo0, inputHandle0Ref.get()); + SetWorkloadInput(refData, refInfo, 1, inputTensorInfo1, inputHandle1Ref.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateMultiplication(refData, refInfo); + + inputHandle0->Allocate(); + inputHandle1->Allocate(); + outputHandle->Allocate(); + inputHandle0Ref->Allocate(); + inputHandle1Ref->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get()); + + return comparisonResult; +} + +LayerTestResult<float,4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 3; + const unsigned int channels = 5; + const unsigned int batchSize = 3; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo tensorInfo; + + constexpr unsigned int shape[] = {batchSize, channels, height, width}; + constexpr unsigned int tensorShape[] = {channels}; + + inputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + tensorInfo = armnn::TensorInfo(1, tensorShape, armnn::DataType::Float32); + + auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 21312); + + auto mean = MakeRandomTensor<float, 1>(tensorInfo, 123); + auto variance = MakeRandomTensor<float, 1>(tensorInfo, 234, 0.0f); + auto beta = MakeRandomTensor<float, 1>(tensorInfo, 123); + auto gamma = MakeRandomTensor<float, 1>(tensorInfo, 345); + + LayerTestResult<float,4> ret(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::BatchNormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle meanTensor(tensorInfo); + armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo); + armnn::ScopedCpuTensorHandle betaTensor(tensorInfo); + armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo); + + AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); + AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); + AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); + AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Mean = &meanTensor; + data.m_Variance = &varianceTensor; + data.m_Beta = &betaTensor; + data.m_Gamma = &gammaTensor; + data.m_Parameters.m_Eps = 0.01f; + + armnn::BatchNormalizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateBatchNormalization(refData, refInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + inputHandleRef->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +void Concatenate(armnn::IWorkloadFactory& workloadFactory, + std::initializer_list<const armnn::TensorInfo> inputTensorInfos, + std::initializer_list<void*> inputs, + const armnn::TensorInfo& outputTensorInfo, + void* output, + unsigned int concatDim) +{ + armnn::MergerQueueDescriptor queueDescriptor; + + std::vector<armnn::TensorShape> shapes; + shapes.reserve(inputTensorInfos.size()); + for (const armnn::TensorInfo& it: inputTensorInfos) + { + shapes.push_back(it.GetShape()); + } + armnn::OriginsDescriptor viewsDescriptor = armnn::CreateMergerDescriptorForConcatenation(shapes.begin(), + shapes.end(), concatDim); + + queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews()); + for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i) + { + queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i), + viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions())); + } + + const size_t inputCount = inputTensorInfos.size(); + + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles; + inputHandles.reserve(inputCount); + + const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + for (unsigned int i = 0; i < inputCount; ++i) + { + const armnn::TensorInfo& inputTensorInfo = inputTensorInfos.begin()[i]; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(), + queueDescriptor.m_ViewOrigins[i].m_Origin.data()) + : workloadFactory.CreateTensorHandle(inputTensorInfo); + + inputHandles.emplace_back(std::move(inputHandle)); + } + + armnn::WorkloadInfo workloadInfo; + + for (unsigned int i = 0; i < inputCount; ++i) + { + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos.begin()[i], inputHandles[i].get()); + } + + AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(queueDescriptor, workloadInfo); + + for (auto& inputHandle : inputHandles) + { + inputHandle->Allocate(); + } + + outputHandle->Allocate(); + + unsigned int nextInputId = 0; + for (auto& inputHandle : inputHandles) + { + CopyDataToITensorHandle(inputHandle.get(), *(inputs.begin() + nextInputId++)); + } + + workload->Execute(); + + CopyDataFromITensorHandle(output, outputHandle.get()); +} + +template <typename T> +LayerTestResult<T, 1> Concatenation1dTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo({ 3 }, armnn::GetDataType<T>()); + + auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 1.0f, 2.0f, 3.0f })); + auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 4.0f, 5.0f, 6.0f })); + auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 7.0f, 8.0f, 9.0f })); + + armnn::TensorInfo outputTensorInfo({ 9 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 1> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { inputTensorInfo, inputTensorInfo, inputTensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 0); + + result.output = MakeTensor<T, 1>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f + })); + + return result; +} + +LayerTestResult<float, 1> Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation1dTestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 2> Concatenation2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + const armnn::TensorInfo& outputTensorInfo, + unsigned int dimension, + const float qScale, + const int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo({ 2, 3 }, armnn::GetDataType<T>()); + + auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + })); + + auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 4.0f, 5.0f, 6.0f, + + // Batch 1 + 13.0f, 14.0f, 15.0f, + })); + + auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 7.0f, 8.0f, 9.0f, + + // Batch 1 + 16.0f, 17.0f, 18.0f, + })); + + LayerTestResult<T, 2> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { inputTensorInfo, inputTensorInfo, inputTensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + dimension); + + result.output = MakeTensor<T, 2>(outputTensorInfo, output); + return result; +} + +template <typename T> +LayerTestResult<T, 2> Concatenation2dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 0, qScale, qOffset); + result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + + // Batch 2 + 4.0f, 5.0f, 6.0f, + + // Batch 3 + 13.0f, 14.0f, 15.0f, + + // Batch 4 + 7.0f, 8.0f, 9.0f, + + // Batch 5 + 16.0f, 17.0f, 18.0f, + })); + + return result; +} + +LayerTestResult<float, 2> Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0TestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 2> Concatenation2dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset); + result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f + })); + + return result; +} + +LayerTestResult<float, 2> Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1TestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 2> Concatenation2dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>()); + auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + })); + + armnn::TensorInfo input1TensorInfo({ 3, 3 }, armnn::GetDataType<T>()); + auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 4.0f, 5.0f, 6.0f, + + // Batch 1 + 13.0f, 14.0f, 15.0f, + + // Batch 0 + 7.0f, 8.0f, 9.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 1, 3 }, armnn::GetDataType<T>()); + auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 1 + 16.0f, 17.0f, 18.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>()); + LayerTestResult<T, 2> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 0); + + result.output = MakeTensor<T, 2>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + + // Batch 2 + 4.0f, 5.0f, 6.0f, + + // Batch 3 + 13.0f, 14.0f, 15.0f, + + // Batch 4 + 7.0f, 8.0f, 9.0f, + + // Batch 5 + 16.0f, 17.0f, 18.0f, + })); + + return result; +} + +LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 2> Concatenation2dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>()); + auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + })); + + armnn::TensorInfo input1TensorInfo({ 2, 5 }, armnn::GetDataType<T>()); + auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + + // Batch 1 + 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 2, 1 }, armnn::GetDataType<T>()); + auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 9.0f, + + // Batch 1 + 18.0f + })); + + armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>()); + LayerTestResult<T, 2> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 1); + + result.output = MakeTensor<T, 2>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, + })); + + return result; +} + +LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dTestImpl(armnn::IWorkloadFactory& workloadFactory, + const armnn::TensorInfo& outputTensorInfo, + unsigned int dimension, + float qScale, + int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); + + auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, 8.0f, + + // Batch 0, Channel 1 + 9.0f, 10.0f, + + // Batch 0, Channel 2 + 11.0f, 12.0f, + + // Batch 1, Channel 0 + 25.0f, 26.0f, + + // Batch 1, Channel 1 + 27.0f, 28.0f, + + // Batch 1, Channel 2 + 29.0f, 30.0f + })); + + auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 13.0f, 14.0f, + + // Batch 0, Channel 1 + 15.0f, 16.0f, + + // Batch 0, Channel 2 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 31.0f, 32.0f, + + // Batch 1, Channel 1 + 33.0f, 34.0f, + + // Batch 1, Channel 2 + 35.0f, 36.0f + })); + + LayerTestResult<T, 3> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { inputTensorInfo, inputTensorInfo, inputTensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + dimension); + + result.output = MakeTensor<T, 3>(outputTensorInfo, output); + return result; +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 0, + qScale, qOffset); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 2, Channel 0 + 7.0f, 8.0f, + + // Batch 2, Channel 1 + 9.0f, 10.0f, + + // Batch 2, Channel 2 + 11.0f, 12.0f, + + // Batch 3, Channel 0 + 25.0f, 26.0f, + + // Batch 3, Channel 1 + 27.0f, 28.0f, + + // Batch 3, Channel 2 + 29.0f, 30.0f, + + // Batch 4, Channel 0 + 13.0f, 14.0f, + + // Batch 4, Channel 1 + 15.0f, 16.0f, + + // Batch 4, Channel 2 + 17.0f, 18.0f, + + // Batch 5, Channel 0 + 31.0f, 32.0f, + + // Batch 5, Channel 1 + 33.0f, 34.0f, + + // Batch 5, Channel 2 + 35.0f, 36.0f + })); + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0TestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 2, 9, 2 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 0, Channel 3 + 7.0f, 8.0f, + + // Batch 0, Channel 4 + 9.0f, 10.0f, + + // Batch 0, Channel 5 + 11.0f, 12.0f, + + // Batch 0, Channel 6 + 13.0f, 14.0f, + + // Batch 0, Channel 7 + 15.0f, 16.0f, + + // Batch 0, Channel 8 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 1, Channel 3 + 25.0f, 26.0f, + + // Batch 1, Channel 4 + 27.0f, 28.0f, + + // Batch 1, Channel 5 + 29.0f, 30.0f, + + // Batch 1, Channel 6 + 31.0f, 32.0f, + + // Batch 1, Channel 7 + 33.0f, 34.0f, + + // Batch 1, Channel 8 + 35.0f, 36.0f + })); + + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1TestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim2TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 2, qScale, qOffset); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f, + })); + + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2TestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); + auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + armnn::TensorInfo input1TensorInfo({ 1, 3, 2 }, armnn::GetDataType<T>()); + auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, 8.0f, + + // Batch 0, Channel 1 + 9.0f, 10.0f, + + // Batch 0, Channel 2 + 11.0f, 12.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 3, 3, 2 }, armnn::GetDataType<T>()); + auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 25.0f, 26.0f, + + // Batch 0, Channel 1 + 27.0f, 28.0f, + + // Batch 0, Channel 2 + 29.0f, 30.0f, + + // Batch 1, Channel 0 + 13.0f, 14.0f, + + // Batch 1, Channel 1 + 15.0f, 16.0f, + + // Batch 1, Channel 2 + 17.0f, 18.0f, + + // Batch 2, Channel 0 + 31.0f, 32.0f, + + // Batch 2, Channel 1 + 33.0f, 34.0f, + + // Batch 2, Channel 2 + 35.0f, 36.0f + })); + + armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>()); + LayerTestResult<T, 3> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 0); + + result.output = MakeTensor<T, 3>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 2, Channel 0 + 7.0f, 8.0f, + + // Batch 2, Channel 1 + 9.0f, 10.0f, + + // Batch 2, Channel 2 + 11.0f, 12.0f, + + // Batch 3, Channel 0 + 25.0f, 26.0f, + + // Batch 3, Channel 1 + 27.0f, 28.0f, + + // Batch 3, Channel 2 + 29.0f, 30.0f, + + // Batch 4, Channel 0 + 13.0f, 14.0f, + + // Batch 4, Channel 1 + 15.0f, 16.0f, + + // Batch 4, Channel 2 + 17.0f, 18.0f, + + // Batch 5, Channel 0 + 31.0f, 32.0f, + + // Batch 5, Channel 1 + 33.0f, 34.0f, + + // Batch 5, Channel 2 + 35.0f, 36.0f + })); + + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); + auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + armnn::TensorInfo input1TensorInfo({ 2, 4, 2 }, armnn::GetDataType<T>()); + auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, 8.0f, + + // Batch 0, Channel 1 + 9.0f, 10.0f, + + // Batch 0, Channel 2 + 11.0f, 12.0f, + + // Batch 0, Channel 3 + 25.0f, 26.0f, + + // Batch 1, Channel 0 + 27.0f, 28.0f, + + // Batch 1, Channel 1 + 29.0f, 30.0f, + + // Batch 1, Channel 2 + 13.0f, 14.0f, + + // Batch 1, Channel 3 + 15.0f, 16.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 2, 1, 2 }, armnn::GetDataType<T>()); + auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 31.0f, 32.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 2, 8, 2 }, armnn::GetDataType<T>()); + LayerTestResult<T, 3> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 1); + + result.output = MakeTensor<T, 3>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 0, Channel 3 + 7.0f, 8.0f, + + // Batch 0, Channel 4 + 9.0f, 10.0f, + + // Batch 0, Channel 5 + 11.0f, 12.0f, + + // Batch 0, Channel 6 + 25.0f, 26.0f, + + // Batch 0, Channel 7 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 1, Channel 3 + 27.0f, 28.0f, + + // Batch 1, Channel 4 + 29.0f, 30.0f, + + // Batch 1, Channel 5 + 13.0f, 14.0f, + + // Batch 1, Channel 6 + 15.0f, 16.0f, + + // Batch 1, Channel 7 + 31.0f, 32.0f, + })); + + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim2DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); + auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + armnn::TensorInfo input1TensorInfo({ 2, 3, 1 }, armnn::GetDataType<T>()); + auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, + + // Batch 0, Channel 1 + 9.0f, + + // Batch 0, Channel 2 + 11.0f, + + // Batch 1, Channel 0 + 25.0f, + + // Batch 1, Channel 1 + 27.0f, + + // Batch 1, Channel 2 + 29.0f + })); + + armnn::TensorInfo input2TensorInfo({ 2, 3, 3 }, armnn::GetDataType<T>()); + auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 13.0f, 14.0f, 50.0f, + + // Batch 0, Channel 1 + 15.0f, 16.0f, 51.0f, + + // Batch 0, Channel 2 + 17.0f, 18.0f, 52.0f, + + // Batch 1, Channel 0 + 31.0f, 32.0f, 53.0f, + + // Batch 1, Channel 1 + 33.0f, 34.0f, 54.0f, + + // Batch 1, Channel 2 + 35.0f, 36.0f, 55.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>()); + LayerTestResult<T, 3> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 2); + + result.output = MakeTensor<T, 3>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f, + })); + + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 2.0f, 3.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 5.0f, + 3.0f, 4.0f, 5.0f, 6.0f, + 4.0f, 5.0f, 6.0f, 7.0f + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = input; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 2; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 255.0f, + 200.0f, 250.f, + })); + + // The 'resize bilinear' operation projects the top-left corner of output texels into the input image, + // then figures out the interpolants and weights. Note this is different to projecting the centre of the + // output texel - and thus we'll expect the output 1x1 matrix to contain as its single element the value + // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting + // the centre). + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ + 1.0f + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 2.0f, 3.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 5.0f, + 3.0f, 4.0f, 5.0f, 6.0f, + 4.0f, 5.0f, 6.0f, 7.0f + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ + 1.f, 3.f, + 3.f, 5.f + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 5; + constexpr unsigned int inputHeight = 3; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 3; + constexpr unsigned int outputHeight = 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 2.0f, 3.0f, 5.0f, 8.0f, + 13.0f, 21.0f, 34.0f, 55.0f, 89.0f, + 144.0f, 233.0f, 377.0f, 610.0f, 987.0f + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ + 1.0f, 2.6666f, 6.0f, + 78.5f, 179.3333f, 401.f + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 3; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 5; + constexpr unsigned int outputHeight = 3; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 2.0f, + 13.0f, 21.0f, + 144.0f, 233.0f + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ + 1.0f, 1.4f, 1.8f, 2.f, 2.f, + 13.f, 16.2f, 19.4f, 21.f, 21.f, + 144.f, 179.6f, 215.2f, 233.f, 233.f + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int width = 2; + constexpr unsigned int height = 3; + + const armnn::TensorInfo tensorInfo({height, width }, + armnn::DataType::Float32); + auto input = MakeTensor<float, 2>(tensorInfo, std::vector<float>({ + -10.0f, -5.0f, + 0.0f, 5.0f, + 10.0f, 10.0f + })); + + LayerTestResult<float, 2> ret(tensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(tensorInfo); + + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(tensorInfo); + + armnn::FakeQuantizationQueueDescriptor data; + armnn::WorkloadInfo info; + + AddInputToWorkload(data, info, tensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, tensorInfo, outputHandle.get()); + float min = -10.f; + float max = 10.f; + + data.m_Parameters.m_Min = min; + data.m_Parameters.m_Max = max; + + armnn::PassthroughCpuTensorHandle refHandle(tensorInfo, &ret.outputExpected[0][0]); + armnn::FakeQuantizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadOutput(refData, refInfo, 0, tensorInfo, &refHandle); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFakeQuantization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + ret.outputExpected = MakeTensor<float, 2>(tensorInfo, std::vector<float>({ + 0.0f, 63.0f, + 128.0f, 191.0f, + 255.0f, 255.0f + })); + return ret; +} + +LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 1; + constexpr unsigned int inputHeight = 1; + constexpr unsigned int inputChannels = 10; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f + })); + + const float approxInvL2Norm = 0.050964719f; + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f * approxInvL2Norm, + 2.0f * approxInvL2Norm, + 3.0f * approxInvL2Norm, + 4.0f * approxInvL2Norm, + 5.0f * approxInvL2Norm, + 6.0f * approxInvL2Norm, + 7.0f * approxInvL2Norm, + 8.0f * approxInvL2Norm, + 9.0f * approxInvL2Norm, + 10.0f * approxInvL2Norm + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +namespace +{ + +float CalcInvL2Norm(std::initializer_list<float> elements) +{ + const float reduction = std::accumulate(elements.begin(), elements.end(), 0.0f, + [](float acc, float element) { return acc + element * element; }); + return 1.0f / sqrtf(reduction); +} + +} + +LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 5; + constexpr unsigned int inputHeight = 1; + constexpr unsigned int inputChannels = 2; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 3.0f, 5.0f, 7.0f, 9.0f, + 2.0f, 4.0f, 6.0f, 8.0f, 10.0f + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f * CalcInvL2Norm({ 1.0f, 2.0f }), + 3.0f * CalcInvL2Norm({ 3.0f, 4.0f }), + 5.0f * CalcInvL2Norm({ 5.0f, 6.0f }), + 7.0f * CalcInvL2Norm({ 7.0f, 8.0f }), + 9.0f * CalcInvL2Norm({ 9.0f, 10.0f }), + + 2.0f * CalcInvL2Norm({ 1.0f, 2.0f }), + 4.0f * CalcInvL2Norm({ 3.0f, 4.0f }), + 6.0f * CalcInvL2Norm({ 5.0f, 6.0f }), + 8.0f * CalcInvL2Norm({ 7.0f, 8.0f }), + 10.0f * CalcInvL2Norm({ 9.0f, 10.0f }) + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 2; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + // Channel 0 + 119.0f, 21.0f, 150.0f, + 149.0f, 32.0f, 179.0f, + 15.0f, 227.0f, 141.0f, + 147.0f, 199.0f, 220.0f, + + // Channel 1 + 110.0f, 140.0f, 73.0f, + 211.0f, 212.0f, 89.0f, + 24.0f, 138.0f, 188.0f, + 162.0f, 12.0f, 161.0f, + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 119.0f * CalcInvL2Norm({ 119.0f, 110.0f }), + 21.0f * CalcInvL2Norm({ 21.0f, 140.0f }), + 150.0f * CalcInvL2Norm({ 150.0f, 73.0f }), + 149.0f * CalcInvL2Norm({ 149.0f, 211.0f }), + 32.0f * CalcInvL2Norm({ 32.0f, 212.0f }), + 179.0f * CalcInvL2Norm({ 179.0f, 89.0f }), + 15.0f * CalcInvL2Norm({ 15.0f, 24.0f }), + 227.0f * CalcInvL2Norm({ 227.0f, 138.0f }), + 141.0f * CalcInvL2Norm({ 141.0f, 188.0f }), + 147.0f * CalcInvL2Norm({ 147.0f, 162.0f }), + 199.0f * CalcInvL2Norm({ 199.0f, 12.0f }), + 220.0f * CalcInvL2Norm({ 220.0f, 161.0f }), + + 110.0f * CalcInvL2Norm({ 119.0f, 110.0f }), + 140.0f * CalcInvL2Norm({ 21.0f, 140.0f }), + 73.0f * CalcInvL2Norm({ 150.0f, 73.0f }), + 211.0f * CalcInvL2Norm({ 149.0f, 211.0f }), + 212.0f * CalcInvL2Norm({ 32.0f, 212.0f }), + 89.0f * CalcInvL2Norm({ 179.0f, 89.0f }), + 24.0f * CalcInvL2Norm({ 15.0f, 24.0f }), + 138.0f * CalcInvL2Norm({ 227.0f, 138.0f }), + 188.0f * CalcInvL2Norm({ 141.0f, 188.0f }), + 162.0f * CalcInvL2Norm({ 147.0f, 162.0f }), + 12.0f * CalcInvL2Norm({ 199.0f, 12.0f }), + 161.0f * CalcInvL2Norm({ 220.0f, 161.0f }), + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputBatchSize = 2; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + // Batch 0, Channel 0 + 235.0f, 46.0f, 178.0f, + 100.0f, 123.0f, 19.0f, + 172.0f, 74.0f, 250.0f, + 6.0f, 195.0f, 80.0f, + + // Batch 0, Channel 1 + 113.0f, 95.0f, 202.0f, + 77.0f, 114.0f, 71.0f, + 122.0f, 246.0f, 166.0f, + 82.0f, 28.0f, 37.0f, + + // Batch 0, Channel 2 + 56.0f, 170.0f, 162.0f, + 194.0f, 89.0f, 254.0f, + 12.0f, 209.0f, 200.0f, + 1.0f, 64.0f, 54.0f, + + // Batch 1, Channel 0 + 67.0f, 90.0f, 49.0f, + 7.0f, 163.0f, 18.0f, + 25.0f, 117.0f, 103.0f, + 247.0f, 59.0f, 189.0f, + + // Batch 1, Channel 1 + 239.0f, 104.0f, 199.0f, + 17.0f, 124.0f, 153.0f, + 222.0f, 217.0f, 75.0f, + 32.0f, 126.0f, 21.0f, + + // Batch 1, Channel 2 + 97.0f, 145.0f, 215.0f, + 115.0f, 116.0f, 238.0f, + 226.0f, 16.0f, 132.0f, + 92.0f, 125.0f, 88.0f, + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + + // Batch 0, Channel 0 + 235.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), + 46.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), + 178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), + 100.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), + 123.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), + 19.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), + 172.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), + 74.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), + 250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), + 6.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), + 195.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), + 80.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), + + // Batch 0, Channel 1 + 113.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), + 95.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), + 202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), + 77.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), + 114.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), + 71.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), + 122.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), + 246.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), + 166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), + 82.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), + 28.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), + 37.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), + + // Batch 0, Channel 2 + 56.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), + 170.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), + 162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), + 194.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), + 89.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), + 254.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), + 12.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), + 209.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), + 200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), + 1.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), + 64.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), + 54.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), + + // Batch 1, Channel 0 + 67.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), + 90.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), + 49.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), + 7.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), + 163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), + 18.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), + 25.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), + 117.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), + 103.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), + 247.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), + 59.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), + 189.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), + + // Batch 1, Channel 1 + 239.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), + 104.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), + 199.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), + 17.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), + 124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), + 153.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), + 222.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), + 217.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), + 75.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), + 32.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), + 126.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), + 21.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), + + // Batch 1, Channel 2 + 97.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), + 145.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), + 215.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), + 115.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), + 116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), + 238.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), + 226.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), + 16.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), + 132.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), + 92.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), + 125.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), + 88.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +template <typename T> +LayerTestResult<T, 4> ConstantTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputBatchSize = 2; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType<T>()); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 235.0f, 46.0f, 178.0f, + 100.0f, 123.0f, 19.0f, + 172.0f, 74.0f, 250.0f, + 6.0f, 195.0f, 80.0f, + + // Batch 0, Channel 1 + 113.0f, 95.0f, 202.0f, + 77.0f, 114.0f, 71.0f, + 122.0f, 246.0f, 166.0f, + 82.0f, 28.0f, 37.0f, + + // Batch 0, Channel 2 + 56.0f, 170.0f, 162.0f, + 194.0f, 89.0f, 254.0f, + 12.0f, 209.0f, 200.0f, + 1.0f, 64.0f, 54.0f, + + // Batch 1, Channel 0 + 67.0f, 90.0f, 49.0f, + 7.0f, 163.0f, 18.0f, + 25.0f, 117.0f, 103.0f, + 247.0f, 59.0f, 189.0f, + + // Batch 1, Channel 1 + 239.0f, 104.0f, 199.0f, + 17.0f, 124.0f, 153.0f, + 222.0f, 217.0f, 75.0f, + 32.0f, 126.0f, 21.0f, + + // Batch 1, Channel 2 + 97.0f, 145.0f, 215.0f, + 115.0f, 116.0f, 238.0f, + 226.0f, 16.0f, 132.0f, + 92.0f, 125.0f, 88.0f, + }))); + + LayerTestResult<T, 4> result(outputTensorInfo); + result.outputExpected = input; + + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ScopedCpuTensorHandle constantTensor(inputTensorInfo); + AllocateAndCopyDataToITensorHandle(&constantTensor, &input[0][0][0][0]); + + armnn::ConstantQueueDescriptor descriptor; + descriptor.m_LayerOutput = &constantTensor; + + armnn::WorkloadInfo info; + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConstant(descriptor, info); + + outputHandle->Allocate(); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> ConstantTest(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantTestImpl<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantTestImpl<uint8_t>(workloadFactory, 1.0f, 0); +} + +LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int outputWidth = 5; + unsigned int outputHeight = 6; + unsigned int outputChannels = 3; + + unsigned int inputWidth1 = 2; + unsigned int inputHeight1 = 2; + unsigned int inputChannels1 = 3; + + unsigned int inputWidth2 = 2; + unsigned int inputHeight2 = 4; + unsigned int inputChannels2 = 3; + + unsigned int inputWidth3 = 3; + unsigned int inputHeight3 = 6; + unsigned int inputChannels3 = 2; + + unsigned int inputWidth4 = 3; + unsigned int inputHeight4 = 6; + unsigned int inputChannels4 = 1; + + // Define the tensor descriptors + armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::QuantisedAsymm8); + + // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize + const float scale = 0.13497836f; + const int32_t offset = -7; + + outputTensorInfo.SetQuantizationScale(scale); + outputTensorInfo.SetQuantizationOffset(offset); + inputTensorInfo1.SetQuantizationScale(scale); + inputTensorInfo1.SetQuantizationOffset(offset); + inputTensorInfo2.SetQuantizationScale(scale); + inputTensorInfo2.SetQuantizationOffset(offset); + inputTensorInfo3.SetQuantizationScale(scale); + inputTensorInfo3.SetQuantizationOffset(offset); + inputTensorInfo4.SetQuantizationScale(scale); + inputTensorInfo4.SetQuantizationOffset(offset); + + LayerTestResult<uint8_t, 3> ret(outputTensorInfo); + + ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>( + { + 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, + + 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, + + 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, + 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, + 86, 87, 88, 89, 90, + }) + ); + + + auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>( + { + 1, 2, + 6, 7, + + 31, 32, + 36, 37, + + 61, 62, + 66, 67, + }) + ); + + auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>( + { + 11, 12, + 16, 17, + 21, 22, + 26, 27, + + 41, 42, + 46, 47, + 51, 52, + 56, 57, + + 71, 72, + 76, 77, + 81, 82, + 86, 87, + }) + ); + + auto input3 = MakeTensor<uint8_t, 3>(inputTensorInfo3, std::vector<uint8_t>( + { + 3, 4, 5, + 8, 9, 10, + 13, 14, 15, + 18, 19, 20, + 23, 24, 25, + 28, 29, 30, + + 33, 34, 35, + 38, 39, 40, + 43, 44, 45, + 48, 49, 50, + 53, 54, 55, + 58, 59, 60, + }) + ); + + + auto input4 = MakeTensor<uint8_t, 3>(inputTensorInfo4, std::vector<uint8_t>( + { + 63, 64, 65, + 68, 69, 70, + 73, 74, 75, + 78, 79, 80, + 83, 84, 85, + 88, 89, 90, + }) + ); + + std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //extent of the window is defined by size of input[0] + armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); + + std::vector<unsigned int> wOrigin2 = { 0, 2, 0 }; //extent of the window is defined by size of input[1] + armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); + + std::vector<unsigned int> wOrigin3 = { 0, 0, 2 }; //extent of the window is defined by size of input[2] + armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); + + std::vector<unsigned int> wOrigin4 = { 2, 0, 2 }; //extent of the window is defined by size of input[3] + armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); + + + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo1); + + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo2); + + std::unique_ptr<armnn::ITensorHandle> inputHandle3 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo3); + + std::unique_ptr<armnn::ITensorHandle> inputHandle4 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo4); + + + armnn::MergerQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get()); + AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_ViewOrigins.push_back(window1); + data.m_ViewOrigins.push_back(window2); + data.m_ViewOrigins.push_back(window3); + data.m_ViewOrigins.push_back(window4); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + inputHandle3->Allocate(); + inputHandle4->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); + CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]); + CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int batchSize = 1; + unsigned int channels = 2; + unsigned int height = 2; + unsigned int width = 3; + + const float scale = 7.0f; + const int32_t offset = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + const unsigned int shape[] = { batchSize, channels, height, width }; + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo1.SetQuantizationScale(scale); + inputTensorInfo1.SetQuantizationOffset(offset); + + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo2.SetQuantizationScale(scale); + inputTensorInfo2.SetQuantizationOffset(offset); + + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(scale); + outputTensorInfo.SetQuantizationOffset(offset); + + // See dequantized values to the right + auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>( + { + 63, 35, 77, 70, 56, 112, // 420, 224, 518, 469, 371, 763 + 203, 28, 252, 168, 245, 91 // 1400, 175, 1743, 1155, 1694, 616 + })); + + // See dequantized values to the right + auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>( + { + 21, 7, 175, 231, 175, 210, // 126, 28, 1204, 1596, 1204, 1449 + 126, 161, 63, 21, 105, 126 // 861, 1106, 420, 126, 714, 861 + })); + + // See dequantized values to the right + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>( + { + 81, 39, 249, 255, 228, 255, // 546, 252, 1722, 2065(clamped), 1575, 2212(clamped) + 255, 186, 255, 186, 255, 214, // 2261(clamped), 1281, 2163(clamped), 1281, 2408(clamped), 1477 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + return result; +} + +LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int batchSize = 1; + unsigned int channels = 2; + unsigned int height = 2; + unsigned int width = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + const unsigned int shape[] = { batchSize, channels, height, width }; + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo1.SetQuantizationScale(4.0f); + inputTensorInfo1.SetQuantizationOffset(1); + + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo2.SetQuantizationScale(3.0f); + inputTensorInfo2.SetQuantizationOffset(-2); + + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1366.255f); // Scale/offset chosen to have output values out of range + outputTensorInfo.SetQuantizationOffset(-5); + + // See dequantized values to the right + auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>( + { + 62, 37, 3, 172, 13, 111, // 244, 144, 8, 684, 48, 440, + 188, 20, 73, 31, 23, 31 // 748, 76, 288, 120, 88, 120 + })); + + // See dequantized values to the right + auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>( + { + 126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747, + 48, 115, 151, 79, 78, 97 // 150, 351, 459, 243, 240, 297 + })); + + // See dequantized values to the right + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>( + { + 64, 72, 0, 255, 8, 236, // 93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680, + 77, 15, 92, 16, 10, 21, // 112200, 26676, 132192, 29160, 21120, 35640 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::MultiplicationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + return result; +} + +LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.5f); + inputTensorInfo.SetQuantizationOffset(-3); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.5f); + outputTensorInfo.SetQuantizationOffset(-3); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 6, + 4, 5, 6, 7 + })); + + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = input; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 2; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(0.1567f); + inputTensorInfo.SetQuantizationOffset(1); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(0.1567f); + outputTensorInfo.SetQuantizationOffset(1); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ + 1, 255, + 200, 250 + })); + + // The 'resize bilinear' operation projects the top-left corner of output texels into the input image, + // then figures out the interpolants and weights. Note this is different to projecting the centre of the + // output texel - and thus we'll expect the output 1x1 matrix to contain as its single element the value + // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting + // the centre). + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ + 1 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(3.141592f); + inputTensorInfo.SetQuantizationOffset(3); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(3.141592f); + outputTensorInfo.SetQuantizationOffset(3); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 6, + 4, 5, 6, 7 + })); + + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ + 1, 3, + 3, 5 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 2; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 2; + constexpr unsigned int outputHeight = 1; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.5f); + inputTensorInfo.SetQuantizationOffset(-1); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.5f); + outputTensorInfo.SetQuantizationOffset(-1); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ + 1, 2, 3, // 3.0, 4.5, 6.0 + 5, 8, 13 // 9.0, 13.5, 21.0 + })); + + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ + 1, 3 // 3.0, 5.25 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 3; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 5; + constexpr unsigned int outputHeight = 3; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(0.010765f); + inputTensorInfo.SetQuantizationOffset(7); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(0.010132f); + outputTensorInfo.SetQuantizationOffset(-18); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ + 24, 228, // 0.183005, 2.379065, + 105, 128, // 1.05497, 1.302565 + 230, 71 // 2.400595, 0.68896 + })); + + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ + 0, 87, 173, 217, 217, // 0.18300501, 1.06142902, 1.93985295, 2.37906504, 2.37906504 + 86, 96, 106, 111, 111, // 1.05497003, 1.15400803, 1.25304604, 1.30256498, 1.30256498 + 219, 151, 84, 50, 50 // 2.40059495, 1.71594095, 1.03128707, 0.68896002, 0.68896002 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory) +{ + auto ret = BatchNormTestImpl<float>(workloadFactory, 0.f, 0); + return ret; +} + +LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + auto ret = BatchNormTestImpl<uint8_t>(workloadFactory, 1.f/20.f, 50); + return ret; +} + +LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantTestImpl<uint8_t>(workloadFactory, 2e-6f, 1); +} + +LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation1dTestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding); +} + +LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<uint8_t>(workloadFactory, forceNoPadding, 3.0f, -5); +} + +LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<float>(workloadFactory, forceNoPadding); +} + +LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<uint8_t>(workloadFactory, forceNoPadding, 0.1f, 128); +} + +LayerTestResult<float, 4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleAveragePooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1); +} + +LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return LargeTensorsAveragePooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return LargeTensorsAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1); +} + +LayerTestResult<float, 4> SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleL2Pooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride1TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride1TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride3TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride3TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride4TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride4TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize7TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize7TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize9TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize9TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return AsymmetricNonSquarePooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return AsymmetricNonSquarePooling2dTestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType) +{ + return ComparePooling2dTestCommon<float>(workloadFactory, refWorkloadFactory, poolingType); +} + +LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType) +{ + return ComparePooling2dTestCommon<uint8_t>(workloadFactory, refWorkloadFactory, poolingType, 0.1f, 128); +} + +LayerTestResult<float, 2> FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory, + bool transposeWeights) +{ + return FullyConnectedLargeTestCommon<float>(workloadFactory, transposeWeights); +} + +LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleMaxPooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleMaxPooling2dTestCommon<uint8_t>(workloadFactory, 1.0f, -5); +} + +LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingMaxPooling2dSize3TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingMaxPooling2dSize3TestCommon<uint8_t>(workloadFactory, 1.0f, -5); +} + +LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test( + armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingAveragePooling2dSize3TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingAveragePooling2dSize3TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleL2Pooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingL2Pooling2dSize3TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingL2Pooling2dSize3TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimplePermuteFloat32TestCommon(workloadFactory); +}; + +LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimplePermuteUint8TestCommon(workloadFactory); +}; diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp new file mode 100644 index 0000000000..fc0c9c7b14 --- /dev/null +++ b/src/armnn/backends/test/LayerTests.hpp @@ -0,0 +1,305 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "armnn/ArmNN.hpp" +#include "armnn/Tensor.hpp" + +#include <boost/multi_array.hpp> +#include <boost/assert.hpp> +#include <array> + +// Layer callables + +namespace armnn +{ +class IWorkloadFactory; +} + +template <std::size_t n> +boost::array<unsigned int, n> GetTensorShapeAsArray(const armnn::TensorInfo& tensorInfo) +{ + BOOST_ASSERT_MSG(n == tensorInfo.GetNumDimensions(), + "Attempting to construct a shape array of mismatching size"); + + boost::array<unsigned int, n> shape; + for (unsigned int i = 0; i < n; i++) + { + shape[i] = tensorInfo.GetShape()[i]; + } + return shape; +} + +template <typename T, std::size_t n> +struct LayerTestResult +{ + LayerTestResult(const armnn::TensorInfo& outputInfo) + { + auto shape( GetTensorShapeAsArray<n>(outputInfo) ); + output.resize(shape); + outputExpected.resize(shape); + supported = true; + } + + boost::multi_array<T, n> output; + boost::multi_array<T, n> outputExpected; + bool supported; +}; + +LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<float, 4> +Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory); + + +LayerTestResult<float, 4> Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); +LayerTestResult<uint8_t, 4> Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); + +LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); + +LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); +LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); +LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); +LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding ); +LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test( + armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType); +LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType); + +LayerTestResult<float, 4> ConstantLinearActivationTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta); +LayerTestResult<uint8_t, 2> SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta); + +LayerTestResult<float, 4> SimpleSigmoidTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> SimpleReshapeFloat32Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> SimpleReshapeUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> SimpleFloorTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 1> Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 2> Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 2> Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> SimpleSigmoidUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +template<typename T> +LayerTestResult<T, 4> CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult<float, 4> CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod); + +LayerTestResult<float, 2> CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, float beta); + +LayerTestResult<float, 2> FullyConnectedFloat32Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled, + bool transposeWeights); + +std::vector<LayerTestResult<float, 3>> SplitterTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 3> MergerTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> AdditionTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult<float, 4> CompareActivationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f, + unsigned int batchSize); + +LayerTestResult<float, 4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult<float, 4> BoundedReLuUpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> BoundedReLuUpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> CompareBoundedReLuTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float upperBound, + float lowerBound); + +// Tests that the output should be identical to the input when the output dimensions match the input ones +LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests the behaviour of the resize bilinear operation when rescaling a 2x2 image into a 1x1 image +LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests resize bilinear for minification of a square input matrix (also: input dimensions are a +// multiple of output dimensions) +LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests resize bilinear for minification (output dimensions smaller than input dimensions) +LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests resize bilinear for magnification (output dimensions bigger than input dimensions) +LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> ConstantTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> BoundedReLuUint8Test(armnn::IWorkloadFactory& workloadFactory, float upperBound); +LayerTestResult<uint8_t, 4> BoundedReLuUint8Test(armnn::IWorkloadFactory& workloadFactory, + float upperBound, + float lowerBound); + +LayerTestResult<uint8_t, 2> FullyConnectedUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); + +std::vector<LayerTestResult<uint8_t, 3>> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> CompareActivationUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f); + +LayerTestResult<uint8_t, 2> CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta); + +LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<uint8_t, 4> ConstantLinearActivationUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); + + +LayerTestResult<float, 2> FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory, + bool transposeWeights); +LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory); + diff --git a/src/armnn/backends/test/MemCopyTests.cpp b/src/armnn/backends/test/MemCopyTests.cpp new file mode 100644 index 0000000000..8e4dae35f2 --- /dev/null +++ b/src/armnn/backends/test/MemCopyTests.cpp @@ -0,0 +1,156 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> +#include <boost/multi_array.hpp> + +#include "armnn/ArmNN.hpp" +#include "backends/RefWorkloadFactory.hpp" +#if ARMCOMPUTECL_ENABLED +#include "backends/ClWorkloadFactory.hpp" +#endif +#if ARMCOMPUTENEON_ENABLED +#include "backends/NeonWorkloadFactory.hpp" +#endif +#include "backends/CpuTensorHandle.hpp" +#include "test/TensorHelpers.hpp" + +#include "TensorCopyUtils.hpp" +#include "WorkloadTestUtils.hpp" + +BOOST_AUTO_TEST_SUITE(MemCopyTestSuite) + +void MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, armnn::IWorkloadFactory& dstWorkloadFactory, + bool withSubtensors) +{ + const std::array<unsigned int, 4> shapeData = { 1u, 1u, 6u, 5u }; + const armnn::TensorShape tensorShape(4, shapeData.data()); + const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); + boost::multi_array<float, 4> inputData = MakeTensor<float, 4>(tensorInfo, std::vector<float>( + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + }) + ); + + boost::multi_array<float, 4> outputData(shapeData); + + auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo); + auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo); + + AllocateAndCopyDataToITensorHandle(inputTensorHandle.get(), inputData.data()); + outputTensorHandle->Allocate(); + + armnn::MemCopyQueueDescriptor memCopyQueueDesc; + armnn::WorkloadInfo workloadInfo; + + const unsigned int origin[4] = {}; + + auto workloadInput = (withSubtensors && srcWorkloadFactory.SupportsSubTensors()) + ? srcWorkloadFactory.CreateSubTensorHandle(*inputTensorHandle, tensorShape, origin) + : std::move(inputTensorHandle); + auto workloadOutput = (withSubtensors && dstWorkloadFactory.SupportsSubTensors()) + ? dstWorkloadFactory.CreateSubTensorHandle(*outputTensorHandle, tensorShape, origin) + : std::move(outputTensorHandle); + + AddInputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadInput.get()); + AddOutputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadOutput.get()); + + dstWorkloadFactory.CreateMemCopy(memCopyQueueDesc, workloadInfo)->Execute(); + + CopyDataFromITensorHandle(outputData.data(), workloadOutput.get()); + + BOOST_TEST(CompareTensors(inputData, outputData)); +} + +template <typename SrcWorkloadFactory, typename DstWorkloadFactory> +void MemCopyTest(bool withSubtensors) +{ + SrcWorkloadFactory srcWorkloadFactory; + DstWorkloadFactory dstWorkloadFactory; + MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); +} + +#if ARMCOMPUTECL_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) +{ + MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) +{ + MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) +{ + MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory>(true); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) +{ + MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory>(true); +} + +#endif // ARMCOMPUTECL_ENABLED + +#if ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeon) +{ + MemCopyTest<armnn::RefWorkloadFactory, armnn::NeonWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpu) +{ + MemCopyTest<armnn::NeonWorkloadFactory, armnn::RefWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeonWithSubtensors) +{ + MemCopyTest<armnn::RefWorkloadFactory, armnn::NeonWorkloadFactory>(true); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpuWithSubtensors) +{ + MemCopyTest<armnn::NeonWorkloadFactory, armnn::RefWorkloadFactory>(true); +} + +#endif // ARMCOMPUTENEON_ENABLED + +#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu) +{ + MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon) +{ + MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors) +{ + MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory>(true); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors) +{ + MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory>(true); +} + +#endif + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/NormTestImpl.hpp b/src/armnn/backends/test/NormTestImpl.hpp new file mode 100644 index 0000000000..1f6aadc9df --- /dev/null +++ b/src/armnn/backends/test/NormTestImpl.hpp @@ -0,0 +1,238 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "armnn/Exceptions.hpp" +#include "armnn/LayerSupport.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +LayerTestResult<float,4> SimpleNormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod) +{ + const unsigned int inputHeight = 2; + const unsigned int inputWidth = 2; + const unsigned int inputChannels = 1; + const unsigned int inputNum = 2; + + unsigned int outputHeight = inputHeight; + unsigned int outputWidth = inputWidth; + unsigned int outputChannels = inputChannels; + unsigned int outputNum = inputNum; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + + auto inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + auto outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + LayerTestResult<float,4> ret(outputTensorInfo); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + // Batch #0 + 1.0f, 2.0f, + 3.0f, 4.0f, + // Batch #1 + 5.0f, 6.0f, + 7.0f, 8.0f + })); + + float alpha = 1.f; + float beta = 1.f; + float kappa = 1.f; + uint32_t normSize = 3; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::NormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_NormChannelType = normChannel; + data.m_Parameters.m_NormMethodType = normMethod; + data.m_Parameters.m_NormSize = normSize; + data.m_Parameters.m_Alpha = alpha; + data.m_Parameters.m_Beta = beta; + data.m_Parameters.m_K = kappa; + + armnn::PassthroughCpuTensorHandle refHandle(outputTensorInfo, &ret.outputExpected[0][0][0][0]); + armnn::NormalizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, &refHandle); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateNormalization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + switch (normMethod) + { + case armnn::NormalizationAlgorithmMethod::LocalBrightness: + { + switch (normChannel) + { + case armnn::NormalizationAlgorithmChannel::Within: + { + // When normalising within channels, the 3x3 kernel covers the entire 2x2 input at every index. + // Therefore, all output values should equal the inputs, but divided by: + // pow((kappa + (accumulatedScale * alpha)), beta) + // ...where accumulatedScale is the sum of every element squared + float divisor[inputNum]; + for(int i = 0; i < boost::numeric_cast<int>(inputNum); i++) + { + float accumulatedScale = input[i][0][0][0]*input[i][0][0][0] + + input[i][0][0][1]*input[i][0][0][1] + + input[i][0][1][0]*input[i][0][1][0] + + input[i][0][1][1]*input[i][0][1][1]; + divisor[i] = powf((kappa + accumulatedScale * alpha), beta); + } + ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, + std::vector<float>({input[0][0][0][0]/divisor[0], + input[0][0][0][1]/divisor[0], + input[0][0][1][0]/divisor[0], + input[0][0][1][1]/divisor[0], + input[1][0][0][0]/divisor[1], + input[1][0][0][1]/divisor[1], + input[1][0][1][0]/divisor[1], + input[1][0][1][1]/divisor[1]})); + break; + } + case armnn::NormalizationAlgorithmChannel::Across: + { + // When normalising across channels, all output values should equal the inputs, but multiplied by: + // pow((kappa + (accumulatedScale * alpha)), -beta) + // ...where accumulatedScale is the sum of the inputs for adjacent channels for this element squared + // ...where adjacent channels means within half the normSize for the channel + // The test data has only one channel, so this is simplified below. + std::vector<float> outputVector; + for (int n = 0; n < boost::numeric_cast<int>(inputNum); ++n) + { + for (int h = 0; h < boost::numeric_cast<int>(inputHeight); ++h) + { + for (int w = 0; w < boost::numeric_cast<int>(inputWidth); ++w) + { + float accumulatedScale = input[n][0][h][w]*input[n][0][h][w]; + float scale = powf((kappa + accumulatedScale * alpha), -beta); + outputVector.push_back(input[n][0][h][w] * scale); + } + } + } + ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputVector); + break; + } + default: + { + throw armnn::UnimplementedException("Unsupported normalisation channel type, " + "only Across and Within are supported"); + } + } + break; + } + case armnn::NormalizationAlgorithmMethod::LocalContrast: // NOTE: intentional fallthrough + default: + { + throw armnn::UnimplementedException("Unsupported normalisation method type, " + "only LocalBrightness is supported"); + } + } + + return ret; +} + +LayerTestResult<float,4> CompareNormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod) +{ + constexpr unsigned int inputNum = 5; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + + constexpr unsigned int outputNum = inputNum; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputWidth = inputWidth; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + LayerTestResult<float,4> ret(outputTensorInfo); + + auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 111234); + + constexpr float alpha = 1.f; + constexpr float beta = 1.f; + constexpr float kappa = 1.f; + constexpr uint32_t normSize = 5; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::NormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_NormChannelType = normChannel; + data.m_Parameters.m_NormMethodType = normMethod; + data.m_Parameters.m_NormSize = normSize; + data.m_Parameters.m_Alpha = alpha; + data.m_Parameters.m_Beta = beta; + data.m_Parameters.m_K = kappa; + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::NormalizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + // Don't execute if Normalization is not supported for the method and channel types, as an exception will be raised. + armnn::Compute compute = workloadFactory.GetCompute(); + const size_t reasonIfUnsupportedMaxLen = 255; + char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; + ret.supported = armnn::IsNormalizationSupported(compute, inputTensorInfo, outputTensorInfo, data.m_Parameters, + reasonIfUnsupported, reasonIfUnsupportedMaxLen); + if (!ret.supported) + { + return ret; + } + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateNormalization(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateNormalization(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + diff --git a/src/armnn/backends/test/PermuteTestImpl.hpp b/src/armnn/backends/test/PermuteTestImpl.hpp new file mode 100644 index 0000000000..4eafa1a211 --- /dev/null +++ b/src/armnn/backends/test/PermuteTestImpl.hpp @@ -0,0 +1,121 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +template<typename T> +LayerTestResult<T, 4> SimplePermuteTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::PermuteDescriptor descriptor, + armnn::TensorInfo inputTensorInfo, + armnn::TensorInfo outputTensorInfo, + const std::vector<T>& inputData, + const std::vector<T>& outputExpectedData) +{ + auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); + + LayerTestResult<T, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::PermuteQueueDescriptor data; + data.m_Parameters = descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<float, 4> SimplePermuteFloat32TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2, 2, 2 }; + unsigned int outputShape[] = { 1, 2, 2, 2 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector<float> input = std::vector<float>( + { + 1.0f, 2.0f, + 3.0f, 4.0f, + + 5.0f, 6.0f, + 7.0f, 8.0f + }); + + std::vector<float> outputExpected = std::vector<float>( + { + 1.0f, 5.0f, 2.0f, 6.0f, + 3.0f, 7.0f, 4.0f, 8.0f + }); + + return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} + +LayerTestResult<uint8_t, 4> SimplePermuteUint8TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2, 2, 2 }; + unsigned int outputShape[] = { 1, 2, 2, 2 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.0f); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.0f); + + std::vector<uint8_t> input = std::vector<uint8_t>( + { + 1, 2, + 3, 4, + + 5, 6, + 7, 8 + }); + + std::vector<uint8_t> outputExpected = std::vector<uint8_t>( + { + 1, 5, 2, 6, + 3, 7, 4, 8 + }); + + return SimplePermuteTestImpl<uint8_t>(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} diff --git a/src/armnn/backends/test/Pooling2dTestImpl.hpp b/src/armnn/backends/test/Pooling2dTestImpl.hpp new file mode 100644 index 0000000000..fc84ddb2ca --- /dev/null +++ b/src/armnn/backends/test/Pooling2dTestImpl.hpp @@ -0,0 +1,1039 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include <algorithm> + +template<typename T> +LayerTestResult<T, 4> SimplePooling2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::Pooling2dDescriptor descriptor, + float qScale, + int32_t qOffset, + const boost::multi_array<T, 4>& input, + const boost::multi_array<T, 4>& outputExpected) +{ + unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]); + unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]); + unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]); + unsigned int inputBatchSize = boost::numeric_cast<unsigned int>(input.shape()[0]); + + unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); + unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); + unsigned int outputBatchSize = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult<T, 4> result(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Pooling2dQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters = descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + // Don't execute if Pooling is not supported, as an exception will be raised. + armnn::Compute compute = workloadFactory.GetCompute(); + const size_t reasonIfUnsupportedMaxLen = 255; + char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; + result.supported = armnn::IsPooling2dSupported(compute, inputTensorInfo, outputTensorInfo, + queueDescriptor.m_Parameters, + reasonIfUnsupported, reasonIfUnsupportedMaxLen); + if (!result.supported) + { + return result; + } + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePooling2d(queueDescriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + result.outputExpected = outputExpected; + + return result; +} + +// +// Tests max pooling with the following parameters: +// +// Pooling size: 3x3 +// Stride: (2,4) +// input size: 8x13 +// channels: 2 +// batch size: 2 +// +template<typename T> +LayerTestResult<T, 4> SimpleMaxPooling2dSize3x3Stride2x4TestCommon(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 4; + // forceNoPadding is mainly used for compatibility with ARM Compute. + // As of 16/05/2017, it errors if padX or padY are equal to or greater than the pool size. + descriptor.m_PadLeft = descriptor.m_PadRight = forceNoPadding ? 0 : 3; + descriptor.m_PadTop = descriptor.m_PadBottom = 0; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + unsigned int inputWidth = 8; + unsigned int inputHeight = 13; + unsigned int outputWidth = + (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / + descriptor.m_StrideX; + unsigned int outputHeight = + (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / + descriptor.m_StrideY; + unsigned int channels = 2; + unsigned int batchSize = 2; + + armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + std::vector<float> singleChannelData({ + 0.0f, 4.0f, 8.0f, 1.0f, 6.0f, 4.0f, 5.0f, 8.0f, + 1.0f, 1.0f, 6.0f, 0.0f, 3.0f, 7.0f, 4.0f, 7.0f, + 8.0f, 5.0f, 0.0f, 0.0f, 8.0f, 3.0f, 4.0f, 3.0f, + 8.0f, 2.0f, 5.0f, 4.0f, 1.0f, 9.0f, 2.0f, 0.0f, + 5.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 7.0f, 2.0f, + 1.0f, 2.0f, 6.0f, 2.0f, 7.0f, 9.0f, 5.0f, 2.0f, + 9.0f, 7.0f, 3.0f, 1.0f, 3.0f, 4.0f, 8.0f, 3.0f, + 1.0f, 0.0f, 0.0f, 5.0f, 5.0f, 4.0f, 2.0f, 0.0f, + 6.0f, 4.0f, 3.0f, 6.0f, 9.0f, 5.0f, 5.0f, 6.0f, + 8.0f, 7.0f, 9.0f, 6.0f, 1.0f, 4.0f, 1.0f, 9.0f, + 7.0f, 1.0f, 9.0f, 2.0f, 9.0f, 9.0f, 8.0f, 1.0f, + 4.0f, 4.0f, 5.0f, 9.0f, 2.0f, 6.0f, 6.0f, 4.0f, + 3.0f, 5.0f, 4.0f, 0.0f, 1.0f, 5.0f, 9.0f, 7.0f, + }); + + // Construct input data + std::vector<float> inputData; + auto negator = [](float f) { return -f; }; + + // First image (two channels where the second channel is the negative of the first one) + inputData.insert(inputData.end(), singleChannelData.begin(), singleChannelData.end()); + std::transform(singleChannelData.begin(), singleChannelData.end(), std::back_inserter(inputData), negator); + + // Second image (same as first image) + inputData.insert(inputData.end(), singleChannelData.begin(), singleChannelData.end()); + std::transform(singleChannelData.begin(), singleChannelData.end(), std::back_inserter(inputData), negator); + + auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData)); + + // these were calculated manually + auto shape(GetTensorShapeAsArray<4>(outputTensorInfo)); + boost::multi_array<T, 4> outputExpected(shape); + if (forceNoPadding) + { + outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 8.0f, 8.0f, 8.0f, + 9.0f, 7.0f, 9.0f, + 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, -3.0f, + -1.0f, 0.0f, 0.0f, + -1.0f, -1.0f, -1.0f, + + 8.0f, 8.0f, 8.0f, + 9.0f, 7.0f, 9.0f, + 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, -3.0f, + -1.0f, 0.0f, 0.0f, + -1.0f, -1.0f, -1.0f + })); + } + else + { + outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 0.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, + 0.0f, 9.0f, 7.0f, 9.0f, 9.0f, 3.0f, + 0.0f, 8.0f, 9.0f, 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, 0.0f, 0.0f,-3.0f, 0.0f, + 0.0f,-1.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f,-1.0f,-1.0f,-1.0f,-1.0f, 0.0f, + + 0.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, + 0.0f, 9.0f, 7.0f, 9.0f, 9.0f, 3.0f, + 0.0f, 8.0f, 9.0f, 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, 0.0f, 0.0f,-3.0f, 0.0f, + 0.0f,-1.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f,-1.0f,-1.0f,-1.0f,-1.0f, 0.0f + })); + } + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> SimpleAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.5f, 4.0f, + 1.0f, 2.5f, 4.0f, + 1.0f, 2.5f, 4.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> LargeTensorsAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 100; + descriptor.m_StrideX = descriptor.m_StrideY = 5; + descriptor.m_PadLeft = 50; + descriptor.m_PadRight = 50; + descriptor.m_PadTop = 50; + descriptor.m_PadBottom = 50; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 5, 3, 52, 60 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 5, 3, 11, 13 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + std::vector<T> inputVec; + + for (unsigned int i = 0 ; i < inputTensorInfo.GetShape().GetNumElements(); ++i) + { + inputVec.push_back(1); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, inputVec); + + std::vector<T> outputVec; + + for (unsigned int i = 0 ; i < outputTensorInfo.GetShape().GetNumElements(); ++i) + { + outputVec.push_back(1); + } + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputVec); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> SimpleL2Pooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 7.0f, 1.0f, 7.0f, + 1.0f, 7.0f, 1.0f, 7.0f, + 1.0f, 7.0f, 1.0f, 7.0f, + 1.0f, 7.0f, 1.0f, 7.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 5.0f, 5.0f, + 5.0f, 5.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> L2Pooling2dSize3Stride1TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 2.0f, + 1.0f, 2.0f, 2.0f, 1.0f, + 5.0f, 4.0f, 1.0f, 5.0f, + 2.0f, 1.0f, 5.0f, 2.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, 3.0f, + 3.0f, 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> L2Pooling2dSize3Stride3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 3; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, 3.0f, 3.0f, + 3.0f, 3.0f, 3.0f, + 3.0f, 3.0f, 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> L2Pooling2dSize3Stride4TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 4; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 7, 7 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 0.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 0.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 0.0f, 5.0f, 4.0f, 1.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 2.0f, 1.0f, 5.0f, 0.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 0.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 0.0f, 5.0f, 4.0f, 1.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, 3.0f, + 3.0f, 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> L2Pooling2dSize7TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 7; + descriptor.m_StrideX = descriptor.m_StrideY = 7; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 7, 7 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 0.0f, 2.0f, 0.0f, 3.0f, 0.0f, 4.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 5.0f, 0.0f, 6.0f, 0.0f, 7.0f, 0.0f, + 8.0f, 0.0f, 9.0f, 0.0f, 10.0f, 0.0f, 5.0f, + 0.0f, 5.0f, 0.0f, 2.0f, 0.0f, 1.0f, 1.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 1 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> L2Pooling2dSize9TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 9; + descriptor.m_StrideX = descriptor.m_StrideY = 9; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 1 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> AsymmetricNonSquarePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::TensorInfo inputTensorInfo({ 1, 1, 1, 3 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); + + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = 2; + descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 2; + descriptor.m_PadRight = 0; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 2; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + // Construct input data + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 3.0f, 4.0f, + })); + + // these were calculated manually + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 0.0f, 3.0f, 0.0f, 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> ComparePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + const unsigned int inputWidth = 16; + const unsigned int inputHeight = 32; + const unsigned int channelCount = 2; + const unsigned int batchSize = 5; + + const unsigned int poolSize = 3; + const unsigned int strideX = 2; + const unsigned int strideY = 4; + const unsigned int padX = 0; + const unsigned int padY = 0; + + const unsigned int outputWidth = (inputWidth + 2 * padX + strideX - poolSize) / strideX; + const unsigned int outputHeight = (inputHeight + 2 * padY + strideY - poolSize) / strideY; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { batchSize, channelCount, inputHeight, inputWidth }; + unsigned int outputShape[] = { batchSize, channelCount, outputHeight, outputWidth }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + boost::multi_array<T, 4> input = MakeRandomTensor<T, 4>(inputTensorInfo, 81715); + + LayerTestResult<T, 4> comparisonResult(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Pooling2dQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_PoolType = poolingType; + data.m_Parameters.m_PoolWidth = poolSize; + data.m_Parameters.m_PoolHeight = poolSize; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + // Don't execute if Pooling is not supported, as an exception will be raised. + armnn::Compute compute = workloadFactory.GetCompute(); + const size_t reasonIfUnsupportedMaxLen = 255; + char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; + comparisonResult.supported = armnn::IsPooling2dSupported(compute, inputTensorInfo, outputTensorInfo, + data.m_Parameters, + reasonIfUnsupported, reasonIfUnsupportedMaxLen); + if (!comparisonResult.supported) + { + return comparisonResult; + } + + armnn::Pooling2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePooling2d(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreatePooling2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get()); + + return comparisonResult; +} + +// +// Tests max pooling with the following parameters: +// +// Pooling size: 2x2 +// Stride: (2,2) +// input size: 4x4 +// channels: 1 +// batch size: 1 +// +template<typename T> +LayerTestResult<T, 4> SimpleMaxPooling2dSize2x2Stride2x2TestCommon(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 2; + descriptor.m_PadLeft = descriptor.m_PadRight = forceNoPadding ? 0 : 3; + descriptor.m_PadTop = descriptor.m_PadBottom = 0; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + unsigned int inputWidth = 4; + unsigned int inputHeight = 4; + unsigned int outputWidth = + (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / + descriptor.m_StrideX; + unsigned int outputHeight = + (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / + descriptor.m_StrideY; + unsigned int channels = 1; + unsigned int batchSize = 1; + + std::vector<float> inputData = { + 510.0f, 222.0f, 780.0f, 654.0f, + 141.0f, 276.0f, 15.0f, 546.0f, + 303.0f, 618.0f, 582.0f, 339.0f, + 438.0f, 564.0f, 573.0f, 402.0f + }; + + // Note that left and right edges will be 0.f, due to the 2x2 max pooling only accessing zeros here + std::vector<float> expectedOutputDataWithPadding = { + 0.0f, 510.0f, 780.0f, 654.0f, 0.0f, + 0.0f, 438.0f, 618.0f, 402.0f, 0.0f + }; + + std::vector<float> expectedOutputDataNoPadding = { + 510.0f, 780.0f, + 618.0f, 582.0f + }; + + armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType<T>()); + + // Scale and offset should match input - we're just calculating maximum values. + armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData)); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + forceNoPadding ? QuantizedVector<T>(qScale, qOffset, expectedOutputDataNoPadding) : + QuantizedVector<T>(qScale, qOffset, expectedOutputDataWithPadding)); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingSimpleMaxPooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + -1.0f, -2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + -1.0f, 3.0f, 4.0f, + 1.0f, 3.0f, 4.0f, + 1.0f, 2.0f, -4.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingMaxPooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + -1.0f, -2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + -1.0f, 3.0f, 4.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 4.0f, + 2.0f, 2.0f, 2.0f, -3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingSimpleAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 12.0f, 20.0f, 32.0f, 40.0f, + 12.0f, 20.0f, 32.0f, 40.0f, + 12.0f, 20.0f, 32.0f, 40.0f, + 12.0f, 20.0f, 32.0f, 40.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, 13.0f, 10.0f, + 6.0f, 26.0f, 20.0f, + 3.0f, 13.0f, 10.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 0; + descriptor.m_PadRight = 0; + descriptor.m_PadTop = 0; + descriptor.m_PadBottom = 0; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Ceiling; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 3.5f, + 2.0f, 3.5f + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingAveragePooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 9.0f, 27.0f, 18.0f, 36.0f, + 18.0f, 9.0f, 18.0f, 9.0f, + 27.0f, 18.0f, 9.0f, 27.0f, + 9.0f, 27.0f, 9.0f, 18.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 7.0f, 11.0f, 13.0f, 9.0f, + 12.0f, 17.0f, 19.0f, 13.0f, + 12.0f, 16.0f, 16.0f, 10.0f, + 9.0f, 11.0f, 12.0f, 7.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingSimpleL2Pooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 4.0f, 8.0f, 16.0f, + 4.0f, 2.0f, 2.0f, 4.0f, + 8.0f, 2.0f, 4.0f, 2.0f, + 16.0f, 2.0f, 2.0f, 8.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 4.4721f, 8.0f, + 4.4721f, 2.6457f, 2.236f, + 8.0f, 1.4142f, 4.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingL2Pooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0540f, 1.7638f, 2.5385f, 2.3570f, + 1.2909f, 2.1602f, 3.1091f, 2.8867f, + 1.2909f, 2.1602f, 3.1091f, 2.8867f, + 1.0540f, 1.7638f, 2.5385f, 2.3570f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} diff --git a/src/armnn/backends/test/QuantizeHelper.hpp b/src/armnn/backends/test/QuantizeHelper.hpp new file mode 100644 index 0000000000..bfaf9342f0 --- /dev/null +++ b/src/armnn/backends/test/QuantizeHelper.hpp @@ -0,0 +1,91 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/TypesUtils.hpp> + +#include <initializer_list> +#include <iterator> +#include <vector> +#include <boost/core/ignore_unused.hpp> + +template<typename T, bool DoQuantize=true> +struct SelectiveQuantizer +{ + static T Quantize(float value, float scale, int32_t offset) + { + return armnn::Quantize<T>(value, scale, offset); + } + + static float Dequantize(T value, float scale, int32_t offset) + { + return armnn::Dequantize(value, scale, offset); + } +}; + +template<typename T> +struct SelectiveQuantizer<T, false> +{ + static T Quantize(float value, float scale, int32_t offset) + { + boost::ignore_unused(scale, offset); + return value; + } + + static float Dequantize(T value, float scale, int32_t offset) + { + boost::ignore_unused(scale, offset); + return value; + } +}; + +template<typename T> +T SelectiveQuantize(float value, float scale, int32_t offset) +{ + return SelectiveQuantizer<T, armnn::IsQuantizedType<T>()>::Quantize(value, scale, offset); +}; + +template<typename T> +float SelectiveDequantize(T value, float scale, int32_t offset) +{ + return SelectiveQuantizer<T, armnn::IsQuantizedType<T>()>::Dequantize(value, scale, offset); +}; + +template<typename ItType> +struct IsFloatingPointIterator +{ + static constexpr bool value=std::is_floating_point<typename std::iterator_traits<ItType>::value_type>::value; +}; + +template <typename T, typename FloatIt, +typename std::enable_if<IsFloatingPointIterator<FloatIt>::value, int>::type=0 // Make sure valid fp iterator +> +std::vector<T> QuantizedVector(float qScale, int32_t qOffset, FloatIt first, FloatIt last) +{ + std::vector<T> quantized; + quantized.reserve(boost::numeric_cast<size_t>(std::distance(first, last))); + + for (auto it = first; it != last; ++it) + { + auto f = *it; + T q =SelectiveQuantize<T>(f, qScale, qOffset); + quantized.push_back(q); + } + + return quantized; +} + +template<typename T> +std::vector<T> QuantizedVector(float qScale, int32_t qOffset, const std::vector<float>& array) +{ + return QuantizedVector<T>(qScale, qOffset, array.begin(), array.end()); +} + +template<typename T> +std::vector<T> QuantizedVector(float qScale, int32_t qOffset, std::initializer_list<float> array) +{ + return QuantizedVector<T>(qScale, qOffset, array.begin(), array.end()); +} diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp new file mode 100644 index 0000000000..87d82f1781 --- /dev/null +++ b/src/armnn/backends/test/Reference.cpp @@ -0,0 +1,231 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "LayerTests.hpp" +#include "test/TensorHelpers.hpp" + +#include "backends/RefWorkloadFactory.hpp" + +#include "test/UnitTests.hpp" + +BOOST_AUTO_TEST_SUITE(Compute_Reference) +using FactoryType = armnn::RefWorkloadFactory; + +// ============================================================================ +// UNIT tests + +// Convolution +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5, SimpleConvolution2d3x5Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5Uint8, SimpleConvolution2d3x5Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolutionUint8, SimpleConvolution2d3x5Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution1dUint8, Convolution1dUint8Test, true) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3, SimpleConvolution2d3x3Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8, SimpleConvolution2d3x3Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPaddingLargerThanHalfKernelSize, + Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) + +// Depthwise Convolution +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2d, DepthwiseConvolution2dTest, true) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dUint8, DepthwiseConvolution2dUint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2d, DepthwiseConvolution2dTest, false) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dUint8, DepthwiseConvolution2dUint8Test, false) + +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) + +// Pooling +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2, SimpleMaxPooling2dSize2x2Stride2x2Test, false) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2Uint8, SimpleMaxPooling2dSize2x2Stride2x2Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, false) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, + IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) + +ARMNN_AUTO_TEST_CASE(AsymmNonSquarePooling2d, AsymmetricNonSquarePooling2dTest) +ARMNN_AUTO_TEST_CASE(AsymmNonSquarePooling2dUint8, AsymmetricNonSquarePooling2dUint8Test) + +// Activation +ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) +ARMNN_AUTO_TEST_CASE(ConstantLinearActivationUint8, ConstantLinearActivationUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcross, SimpleNormalizationAcrossTest) +ARMNN_AUTO_TEST_CASE(SimpleNormalizationWithin, SimpleNormalizationWithinTest) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) + +ARMNN_AUTO_TEST_CASE(SimpleSigmoid, SimpleSigmoidTest) +ARMNN_AUTO_TEST_CASE(SimpleSigmoidUint8, SimpleSigmoidUint8Test) + +ARMNN_AUTO_TEST_CASE(ReLu1, BoundedReLuUpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6, BoundedReLuUpperBoundOnlyTest) +ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) + +// Fully Conected +ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedUint8, FullyConnectedUint8Test, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedBiasedUint8, FullyConnectedUint8Test, true) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) + +ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) + +// Splitter +BOOST_AUTO_TEST_CASE(SimpleSplitter) +{ + armnn::RefWorkloadFactory workloadFactory; + auto testResult = SplitterTest(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +BOOST_AUTO_TEST_CASE(SplitterUint8) +{ + armnn::RefWorkloadFactory workloadFactory; + auto testResult = SplitterUint8Test(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) +ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) + +// Merger +ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) +ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) + +// Add +ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest) + +ARMNN_AUTO_TEST_CASE(AdditionUint8, AdditionUint8Test) +ARMNN_AUTO_TEST_CASE(AddBroadcastUint8, AdditionBroadcastUint8Test) +ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Test) + +// Mul +ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) +ARMNN_AUTO_TEST_CASE(MultiplicationUint8, MultiplicationUint8Test) + +// Batch Norm +ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) +ARMNN_AUTO_TEST_CASE(BatchNormUint8, BatchNormUint8Test) + +// Resize Bilinear +ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear, SimpleResizeBilinearTest) +ARMNN_AUTO_TEST_CASE(SimpleResizeBilinearUint8, SimpleResizeBilinearUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearNop, ResizeBilinearNopTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearNopUint8, ResizeBilinearNopUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMin, ResizeBilinearSqMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMinUint8, ResizeBilinearSqMinUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMin, ResizeBilinearMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMinUint8, ResizeBilinearMinUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMag, ResizeBilinearMagTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMagUint8, ResizeBilinearMagUint8Test) + +// Fake Quantization +ARMNN_AUTO_TEST_CASE(FakeQuantization, FakeQuantizationTest) + +// L2 Noramlization +ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest) + +// Constant +ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) +ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantUint8Test) + +// Concat +ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) +ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) + +// Floor +ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) + +// Reshape +ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) +ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) + +// Permute +ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) +ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/ReshapeTestImpl.hpp b/src/armnn/backends/test/ReshapeTestImpl.hpp new file mode 100644 index 0000000000..1a31aa3bce --- /dev/null +++ b/src/armnn/backends/test/ReshapeTestImpl.hpp @@ -0,0 +1,177 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +template<typename T> +LayerTestResult<T, 4> SimpleReshapeTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::TensorInfo inputTensorInfo, + armnn::TensorInfo outputTensorInfo, + const std::vector<T>& inputData, + const std::vector<T>& outputExpectedData) +{ + auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); + + LayerTestResult<T, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ReshapeQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReshape(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<float, 4> SimpleReshapeFloat32Test(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 2, 2, 3, 3 }; + unsigned int outputShape[] = { 2, 2, 9, 1 }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector<float> input = std::vector<float>( + { + 0.0f, 1.0f, 2.0f, + 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, + + 9.0f, 10.0f, 11.0f, + 12.0f, 13.0f, 14.0f, + 15.0f, 16.0f, 17.0f, + + 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, + 24.0f, 25.0f, 26.0f, + + 27.0f, 28.0f, 29.0f, + 30.0f, 31.0f, 32.0f, + 33.0f, 34.0f, 35.0f, + }); + + std::vector<float> outputExpected = std::vector<float>( + { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + + 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + + 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, + + 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + }); + + return SimpleReshapeTestImpl<float>(workloadFactory, inputTensorInfo, outputTensorInfo, input, outputExpected); +} + +LayerTestResult<float, 4> SimpleFloorTest(armnn::IWorkloadFactory& workloadFactory) +{ + const armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo(inputTensorInfo); + + auto input = MakeTensor<float, 4>(inputTensorInfo, + { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, + 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f }); + + LayerTestResult<float, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, + { -38.0f, -16.0f, -9.0f, -2.0f, -2.0f, -2.0f, -1.0f, -1.0f, 0.0f, + 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 2.0f, 8.0f, 15.0f, 37.0f }); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::FloorQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFloor(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<uint8_t, 4> SimpleReshapeUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 2, 2, 3, 3 }; + unsigned int outputShape[] = { 2, 2, 9, 1 }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.0f); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.0f); + + std::vector<uint8_t> input = std::vector<uint8_t>( + { + 0, 1, 2, + 3, 4, 5, + 6, 7, 8, + + 9, 10, 11, + 12, 13, 14, + 15, 16, 17, + + 18, 19, 20, + 21, 22, 23, + 24, 25, 26, + + 27, 28, 29, + 30, 31, 32, + 33, 34, 35, + }); + + std::vector<uint8_t> outputExpected = std::vector<uint8_t>( + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, + + 9, 10, 11, 12, 13, 14, 15, 16, 17, + + 18, 19, 20, 21, 22, 23, 24, 25, 26, + + 27, 28, 29, 30, 31, 32, 33, 34, 35, + }); + + return SimpleReshapeTestImpl<uint8_t>(workloadFactory, inputTensorInfo, outputTensorInfo, input, outputExpected); +} diff --git a/src/armnn/backends/test/SoftmaxTestImpl.hpp b/src/armnn/backends/test/SoftmaxTestImpl.hpp new file mode 100644 index 0000000000..5aa74f9618 --- /dev/null +++ b/src/armnn/backends/test/SoftmaxTestImpl.hpp @@ -0,0 +1,150 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include <algorithm> + +template<typename T> +LayerTestResult<T, 2> SimpleSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFactory, float beta) +{ + using std::exp; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 2, 4 }; + + inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); + float qScale = 1.f / 256.f; + int qOffset = 0; + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + + outputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + + LayerTestResult<T, 2> ret(outputTensorInfo); + + // Each row is independently softmax'd + auto input = MakeTensor<T, 2>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(qScale, 0, { + 0.f, 1.f, 0.f, 0.f, + .5f, 0.f, 0.f, 0.f, + }))); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::SoftmaxQueueDescriptor data; + data.m_Parameters.m_Beta = beta; + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSoftmax(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + float x0[4] = { exp((0.f - 1.0f) * beta), exp((1.0f - 1.0f) * beta), + exp((0.0f - 1.0f) * beta), exp((0.0f - 1.0f) * beta) }; + float sum0 = x0[0] + x0[1] + x0[2] + x0[3]; + float x1[4] = { exp((0.5f - 0.5f) * beta), exp((0.0f - 0.5f) * beta), + exp((0.0f - 0.5f) * beta), exp((0.0f - 0.5f) * beta) }; + float sum1 = x1[0] + x1[1] + x1[2] + x1[3]; + + ret.outputExpected = MakeTensor<T, 2>(outputTensorInfo, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + x0[0] / sum0, x0[1] / sum0, x0[2] / sum0, x0[3] / sum0, + x1[0] / sum1, x1[1] / sum1, x1[2] / sum1, x1[3] / sum1 + }))); + + return ret; +} + +template<typename T> +LayerTestResult<T, 2> CompareSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta) +{ + + const int batchSize = 20; + const int channels = 30; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { batchSize, channels }; + + inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); + float qScale = 1.f / 256.f; + int qOffset = 0; + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + + + LayerTestResult<T, 2> ret(outputTensorInfo); + auto input = MakeRandomTensor<T, 2>(inputTensorInfo, 0xF00D, 0.0f, 1.0f); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::SoftmaxQueueDescriptor data; + data.m_Parameters.m_Beta = beta; + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + + armnn::SoftmaxQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSoftmax(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateSoftmax(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0], outputHandleRef.get()); + + return ret; +}
\ No newline at end of file diff --git a/src/armnn/backends/test/SplitterTestImpl.hpp b/src/armnn/backends/test/SplitterTestImpl.hpp new file mode 100644 index 0000000000..b72046e4bc --- /dev/null +++ b/src/armnn/backends/test/SplitterTestImpl.hpp @@ -0,0 +1,328 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include "backends/test/QuantizeHelper.hpp" + + +template<typename T> +std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int inputWidth = 5; + unsigned int inputHeight = 6; + unsigned int inputChannels = 3; + + unsigned int outputWidth1 = 2; + unsigned int outputHeight1 = 2; + unsigned int outputChannels1 = 3; + + unsigned int outputWidth2 = 2; + unsigned int outputHeight2 = 4; + unsigned int outputChannels2 = 3; + + unsigned int outputWidth3 = 3; + unsigned int outputHeight3 = 6; + unsigned int outputChannels3 = 2; + + unsigned int outputWidth4 = 3; + unsigned int outputHeight4 = 6; + unsigned int outputChannels4 = 1; + + + // Define the tensor descriptors + armnn::TensorInfo inputTensorInfo({ inputChannels, inputHeight, inputWidth }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo1({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo2({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo3({ outputChannels3, outputHeight3, outputWidth3 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo4({ outputChannels4, outputHeight4, outputWidth4 }, armnn::GetDataType<T>()); + // note that output 5 should match output 2 + armnn::TensorInfo outputTensorInfo5({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + // The quantization doesn't really matter as the splitter operator doesn't dequantize/quantize + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo1.SetQuantizationScale(qScale); + outputTensorInfo1.SetQuantizationOffset(qOffset); + outputTensorInfo2.SetQuantizationScale(qScale); + outputTensorInfo2.SetQuantizationOffset(qOffset); + outputTensorInfo3.SetQuantizationScale(qScale); + outputTensorInfo3.SetQuantizationOffset(qOffset); + outputTensorInfo4.SetQuantizationScale(qScale); + outputTensorInfo4.SetQuantizationOffset(qOffset); + outputTensorInfo5.SetQuantizationScale(qScale); + outputTensorInfo5.SetQuantizationOffset(qOffset); + } + + LayerTestResult<T,3> ret1(outputTensorInfo1); + LayerTestResult<T,3> ret2(outputTensorInfo2); + LayerTestResult<T,3> ret3(outputTensorInfo3); + LayerTestResult<T,3> ret4(outputTensorInfo4); + LayerTestResult<T,3> ret5(outputTensorInfo5); + + auto input = MakeTensor<T, 3>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + }) + )); + + + ret1.outputExpected = MakeTensor<T, 3>(outputTensorInfo1, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, + 6.0f, 7.0f, + + 31.0f, 32.0f, + 36.0f, 37.0f, + + 61.0f, 62.0f, + 66.0f, 67.0f, + }) + )); + + ret2.outputExpected = MakeTensor<T, 3>(outputTensorInfo2, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 11.0f, 12.0f, + 16.0f, 17.0f, + 21.0f, 22.0f, + 26.0f, 27.0f, + + 41.0f, 42.0f, + 46.0f, 47.0f, + 51.0f, 52.0f, + 56.0f, 57.0f, + + 71.0f, 72.0f, + 76.0f, 77.0f, + 81.0f, 82.0f, + 86.0f, 87.0f, + }) + )); + + ret3.outputExpected = MakeTensor<T, 3>(outputTensorInfo3, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 3.0f, 4.0f, 5.0f, + 8.0f, 9.0f, 10.0f, + 13.0f, 14.0f, 15.0f, + 18.0f, 19.0f, 20.0f, + 23.0f, 24.0f, 25.0f, + 28.0f, 29.0f, 30.0f, + + 33.0f, 34.0f, 35.0f, + 38.0f, 39.0f, 40.0f, + 43.0f, 44.0f, 45.0f, + 48.0f, 49.0f, 50.0f, + 53.0f, 54.0f, 55.0f, + 58.0f, 59.0f, 60.0f, + }) + )); + + ret4.outputExpected = MakeTensor<T, 3>(outputTensorInfo4, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 63.0f, 64.0f, 65.0f, + 68.0f, 69.0f, 70.0f, + 73.0f, 74.0f, 75.0f, + 78.0f, 79.0f, 80.0f, + 83.0f, 84.0f, 85.0f, + 88.0f, 89.0f, 90.0f, + }) + )); + + + ret5.outputExpected = MakeTensor<T, 3>(outputTensorInfo5, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 11.0f, 12.0f, + 16.0f, 17.0f, + 21.0f, 22.0f, + 26.0f, 27.0f, + + 41.0f, 42.0f, + 46.0f, 47.0f, + 51.0f, 52.0f, + 56.0f, 57.0f, + + 71.0f, 72.0f, + 76.0f, 77.0f, + 81.0f, 82.0f, + 86.0f, 87.0f, + }) + )); + + std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of output[0] + armnn::SplitterQueueDescriptor::ViewOrigin window1(wOrigin1); + + std::vector<unsigned int> wOrigin2 = {0, 2, 0}; //extent of the window is defined by size of output[1] + armnn::SplitterQueueDescriptor::ViewOrigin window2(wOrigin2); + + std::vector<unsigned int> wOrigin3 = {0, 0, 2}; //extent of the window is defined by size of output[2] + armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); + + std::vector<unsigned int> wOrigin4 = {2, 0, 2}; //extent of the window is defined by size of output[3] + armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4); + + bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> outputHandle1 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo1.GetShape(), wOrigin1.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo1); + + std::unique_ptr<armnn::ITensorHandle> outputHandle2 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo2.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo2); + + std::unique_ptr<armnn::ITensorHandle> outputHandle3 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo3.GetShape(), wOrigin3.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo3); + + std::unique_ptr<armnn::ITensorHandle> outputHandle4 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo4.GetShape(), wOrigin4.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo4); + + std::unique_ptr<armnn::ITensorHandle> outputHandle5 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo5.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo5); + + armnn::SplitterQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo1, outputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo2, outputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo3, outputHandle3.get()); + AddOutputToWorkload(data, info, outputTensorInfo4, outputHandle4.get()); + AddOutputToWorkload(data, info, outputTensorInfo5, outputHandle5.get()); + + data.m_ViewOrigins.push_back(window1); + data.m_ViewOrigins.push_back(window2); + data.m_ViewOrigins.push_back(window3); + data.m_ViewOrigins.push_back(window4); + //add window2 again (to have an overlapping split) + data.m_ViewOrigins.push_back(window2); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSplitter(data, info); + + inputHandle->Allocate(); + outputHandle1->Allocate(); + outputHandle2->Allocate(); + outputHandle3->Allocate(); + outputHandle4->Allocate(); + outputHandle5->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret1.output[0][0][0], outputHandle1.get()); + CopyDataFromITensorHandle(&ret2.output[0][0][0], outputHandle2.get()); + CopyDataFromITensorHandle(&ret3.output[0][0][0], outputHandle3.get()); + CopyDataFromITensorHandle(&ret4.output[0][0][0], outputHandle4.get()); + CopyDataFromITensorHandle(&ret5.output[0][0][0], outputHandle5.get()); + + std::vector<LayerTestResult<T,3>> ret = {ret1, ret2, ret3, ret4, ret5}; + + return ret; +} + + +template <typename T> +LayerTestResult<T, 3> CopyViaSplitterTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) +{ + const armnn::TensorInfo tensorInfo({ 3, 6, 5 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 3>(tensorInfo, QuantizedVector<T>(qScale, qOffset, + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + })); + + std::vector<unsigned int> origin = { 0, 0, 0 }; + armnn::SplitterQueueDescriptor::ViewOrigin window(origin); + + const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(tensorInfo); + + std::unique_ptr<armnn::ITensorHandle> outputHandle = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, tensorInfo.GetShape(), origin.data()) : + workloadFactory.CreateTensorHandle(tensorInfo); + + armnn::SplitterQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, tensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, tensorInfo, outputHandle.get()); + + data.m_ViewOrigins.push_back(window); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSplitter(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0]); + + workload->Execute(); + + LayerTestResult<T, 3> ret(tensorInfo); + CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); + ret.outputExpected = input; + + return ret; +} diff --git a/src/armnn/backends/test/TensorCopyUtils.cpp b/src/armnn/backends/test/TensorCopyUtils.cpp new file mode 100644 index 0000000000..e15c12a76f --- /dev/null +++ b/src/armnn/backends/test/TensorCopyUtils.cpp @@ -0,0 +1,152 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include <algorithm> +#include <cstring> +#include <boost/cast.hpp> + +#include "TensorCopyUtils.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include "backends/ClTensorHandle.hpp" +#endif + +#if ARMCOMPUTENEON_ENABLED +#include "backends/NeonTensorHandle.hpp" +#endif + +#if ARMCOMPUTECLENABLED || ARMCOMPUTENEON_ENABLED +#include "backends/ArmComputeTensorUtils.hpp" +#endif + +#include "backends/CpuTensorHandle.hpp" + +void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem) +{ + switch (tensorHandle->GetType()) + { + case armnn::ITensorHandle::Cpu: + { + auto handle = boost::polymorphic_downcast<armnn::ScopedCpuTensorHandle*>(tensorHandle); + memcpy(handle->GetTensor<void>(), mem, handle->GetTensorInfo().GetNumBytes()); + break; + } +#ifdef ARMCOMPUTECL_ENABLED + case armnn::ITensorHandle::CL: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast<armnn::IClTensorHandle*>(tensorHandle); + handle->Map(true); + switch(handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(static_cast<const float*>(mem), handle->GetTensor()); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(static_cast<const uint8_t*>(mem), handle->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + handle->UnMap(); + break; + } +#endif +#if ARMCOMPUTENEON_ENABLED + case armnn::ITensorHandle::Neon: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast<armnn::INeonTensorHandle*>(tensorHandle); + switch (handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(static_cast<const float*>(mem), handle->GetTensor()); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(static_cast<const uint8_t*>(mem), handle->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + break; + } +#endif + default: + { + throw armnn::UnimplementedException(); + } + } +} + +void CopyDataFromITensorHandle(void* mem, const armnn::ITensorHandle* tensorHandle) +{ + switch (tensorHandle->GetType()) + { + case armnn::ITensorHandle::Cpu: + { + auto handle = boost::polymorphic_downcast<const armnn::ScopedCpuTensorHandle*>(tensorHandle); + memcpy(mem, handle->GetTensor<void>(), handle->GetTensorInfo().GetNumBytes()); + break; + } +#ifdef ARMCOMPUTECL_ENABLED + case armnn::ITensorHandle::CL: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast<const armnn::IClTensorHandle*>(tensorHandle); + const_cast<armnn::IClTensorHandle*>(handle)->Map(true); + switch(handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(handle->GetTensor(), static_cast<float*>(mem)); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(handle->GetTensor(), static_cast<uint8_t*>(mem)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + const_cast<armnn::IClTensorHandle*>(handle)->UnMap(); + break; + } +#endif +#if ARMCOMPUTENEON_ENABLED + case armnn::ITensorHandle::Neon: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast<const armnn::INeonTensorHandle*>(tensorHandle); + switch (handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(handle->GetTensor(), static_cast<float*>(mem)); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(handle->GetTensor(), static_cast<uint8_t*>(mem)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + break; + } +#endif + default: + { + throw armnn::UnimplementedException(); + } + } +} + +void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem) +{ + tensorHandle->Allocate(); + CopyDataToITensorHandle(tensorHandle, mem); +} diff --git a/src/armnn/backends/test/TensorCopyUtils.hpp b/src/armnn/backends/test/TensorCopyUtils.hpp new file mode 100644 index 0000000000..360eec61df --- /dev/null +++ b/src/armnn/backends/test/TensorCopyUtils.hpp @@ -0,0 +1,14 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "armnn/Tensor.hpp" +#include "backends/ITensorHandle.hpp" + +void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem); + +void CopyDataFromITensorHandle(void* mem, const armnn::ITensorHandle* tensorHandle); + +void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem);
\ No newline at end of file diff --git a/src/armnn/backends/test/WorkloadDataValidation.cpp b/src/armnn/backends/test/WorkloadDataValidation.cpp new file mode 100644 index 0000000000..c3a9d40116 --- /dev/null +++ b/src/armnn/backends/test/WorkloadDataValidation.cpp @@ -0,0 +1,450 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/Workload.hpp> +#include <backends/RefWorkloads.hpp> +#include <backends/RefWorkloadFactory.hpp> + +#include <armnn/Exceptions.hpp> + +#include "WorkloadTestUtils.hpp" + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(WorkloadInfoValidation) + + + +BOOST_AUTO_TEST_CASE(QueueDescriptor_Validate_WrongNumOfInputsOutputs) +{ + InputQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + //invalid argument exception is expected, because no inputs and no outputs were defined + BOOST_CHECK_THROW(RefWorkloadFactory().CreateInput(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(RefPooling2dFloat32Workload_Validate_WrongDimTensor) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {2, 3, 4}; // <- invalid - input tensor has to be 4D + unsigned int outputShape[] = {2, 3, 4, 5}; + + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + inputTensorInfo = armnn::TensorInfo(3, inputShape, armnn::DataType::Float32); + + Pooling2dQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + + // invalid argument exception is expected, input tensor has to be 4D + BOOST_CHECK_THROW(RefPooling2dFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(SoftmaxQueueDescriptor_Validate_WrongInputHeight) +{ + unsigned int inputHeight = 1; + unsigned int inputWidth = 1; + unsigned int inputChannels = 4; + unsigned int inputNum = 2; + + unsigned int outputChannels = inputChannels; + unsigned int outputHeight = inputHeight + 1; //makes data invalid - Softmax expects height and width to be 1 + unsigned int outputWidth = inputWidth; + unsigned int outputNum = inputNum; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + SoftmaxQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + //invalid argument exception is expected, because height != 1 + BOOST_CHECK_THROW(RefSoftmaxFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(FullyConnectedQueueDescriptor_Validate_RequiredDataMissing) +{ + unsigned int inputWidth = 1; + unsigned int inputHeight = 1; + unsigned int inputChannels = 5; + unsigned int inputNum = 2; + + unsigned int outputWidth = 1; + unsigned int outputHeight = 1; + unsigned int outputChannels = 3; + unsigned int outputNum = 2; + + // Define the tensor descriptors + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo weightsDesc; + armnn::TensorInfo biasesDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + unsigned int weightsShape[] = { 1, 1, inputChannels, outputChannels }; + unsigned int biasShape[] = { 1, outputChannels, outputHeight, outputWidth }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + weightsDesc = armnn::TensorInfo(4, weightsShape, armnn::DataType::Float32); + biasesDesc = armnn::TensorInfo(4, biasShape, armnn::DataType::Float32); + + FullyConnectedQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + ScopedCpuTensorHandle weightTensor(weightsDesc); + ScopedCpuTensorHandle biasTensor(biasesDesc); + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + invalidData.m_Weight = &weightTensor; + invalidData.m_Bias = &biasTensor; + invalidData.m_Parameters.m_BiasEnabled = true; + invalidData.m_Parameters.m_TransposeWeightMatrix = false; + + + //invalid argument exception is expected, because not all required fields have been provided + //in particular inputsData[0], outputsData[0] and weightsData can not be null + BOOST_CHECK_THROW(RefFullyConnectedFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + + +BOOST_AUTO_TEST_CASE(NormalizationQueueDescriptor_Validate_WrongInputHeight) +{ + constexpr unsigned int inputNum = 5; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + constexpr unsigned int inputChannels = 3; + + constexpr unsigned int outputNum = inputNum; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputHeight = inputHeight + 1; //makes data invalid - normalization requires + //input and output to have the same dimensions + constexpr unsigned int outputWidth = inputWidth; + + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + + armnn::NormalizationAlgorithmMethod normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; + armnn::NormalizationAlgorithmChannel normChannel = armnn::NormalizationAlgorithmChannel::Across; + float alpha = 1.f; + float beta = 1.f; + float kappa = 1.f; + uint32_t normSize = 5; + + NormalizationQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + invalidData.m_Parameters.m_NormChannelType = normChannel; + invalidData.m_Parameters.m_NormMethodType = normMethod; + invalidData.m_Parameters.m_NormSize = normSize; + invalidData.m_Parameters.m_Alpha = alpha; + invalidData.m_Parameters.m_Beta = beta; + invalidData.m_Parameters.m_K = kappa; + + //invalid argument exception is expected, because input height != output height + BOOST_CHECK_THROW(RefNormalizationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(SplitterQueueDescriptor_Validate_WrongWindow) +{ + constexpr unsigned int inputNum = 1; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + constexpr unsigned int inputChannels = 3; + + constexpr unsigned int outputNum = inputNum; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputHeight = 18; + constexpr unsigned int outputWidth = inputWidth; + + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + SplitterQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // invalid since it has only 3 dimensions while the input tensor is 4d + std::vector<unsigned int> wOrigin = {0, 0, 0}; + armnn::SplitterQueueDescriptor::ViewOrigin window(wOrigin); + invalidData.m_ViewOrigins.push_back(window); + + BOOST_TEST_INFO("Invalid argument exception is expected, because split window dimensionality does not " + "match input."); + BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + // invalid since window extends past the boundary of input tensor + std::vector<unsigned int> wOrigin3 = {0, 0, 15, 0}; + armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); + invalidData.m_ViewOrigins[0] = window3; + BOOST_TEST_INFO("Invalid argument exception is expected (wOrigin3[2]+ outputHeight > inputHeight"); + BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + + std::vector<unsigned int> wOrigin4 = {0, 0, 0, 0}; + armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4); + invalidData.m_ViewOrigins[0] = window4; + + std::vector<unsigned int> wOrigin5 = {1, 16, 20, 2}; + armnn::SplitterQueueDescriptor::ViewOrigin window5(wOrigin4); + invalidData.m_ViewOrigins.push_back(window5); + + BOOST_TEST_INFO("Invalid exception due to number of split windows not matching number of outputs."); + BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + + +BOOST_AUTO_TEST_CASE(MergerQueueDescriptor_Validate_WrongWindow) +{ + constexpr unsigned int inputNum = 1; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + + constexpr unsigned int outputNum = 1; + constexpr unsigned int outputChannels = 3; + constexpr unsigned int outputHeight = 32; + constexpr unsigned int outputWidth = 24; + + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + MergerQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // invalid since it has only 3 dimensions while the input tensor is 4d + std::vector<unsigned int> wOrigin = {0, 0, 0}; + armnn::MergerQueueDescriptor::ViewOrigin window(wOrigin); + invalidData.m_ViewOrigins.push_back(window); + + BOOST_TEST_INFO("Invalid argument exception is expected, because merge window dimensionality does not " + "match input."); + BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + // invalid since window extends past the boundary of output tensor + std::vector<unsigned int> wOrigin3 = {0, 0, 15, 0}; + armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); + invalidData.m_ViewOrigins[0] = window3; + BOOST_TEST_INFO("Invalid argument exception is expected (wOrigin3[2]+ inputHeight > outputHeight"); + BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + + std::vector<unsigned int> wOrigin4 = {0, 0, 0, 0}; + armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); + invalidData.m_ViewOrigins[0] = window4; + + std::vector<unsigned int> wOrigin5 = {1, 16, 20, 2}; + armnn::MergerQueueDescriptor::ViewOrigin window5(wOrigin4); + invalidData.m_ViewOrigins.push_back(window5); + + BOOST_TEST_INFO("Invalid exception due to number of merge windows not matching number of inputs."); + BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputNumbers) +{ + armnn::TensorInfo input1TensorInfo; + armnn::TensorInfo input2TensorInfo; + armnn::TensorInfo input3TensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {1, 1, 1, 1}; + + input1TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + input2TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + input3TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + AdditionQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // too few inputs + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); + + // correct + BOOST_CHECK_NO_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo)); + + AddInputToWorkload(invalidData, invalidInfo, input3TensorInfo, nullptr); + + // too many inputs + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputShapes) +{ + armnn::TensorInfo input1TensorInfo; + armnn::TensorInfo input2TensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape1[] = {1, 1, 2, 1}; + unsigned int shape2[] = {1, 1, 3, 2}; + + // Incompatible shapes even with broadcasting + { + input1TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + input2TensorInfo = armnn::TensorInfo(4, shape2, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + + AdditionQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } + + // Output size not compatible with input sizes + { + input1TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + input2TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape2, armnn::DataType::Float32); + + AdditionQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // output differs + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } +} + +BOOST_AUTO_TEST_CASE(MultiplicationQueueDescriptor_Validate_InputTensorDimensionMismatch) +{ + armnn::TensorInfo input0TensorInfo; + armnn::TensorInfo input1TensorInfo; + armnn::TensorInfo outputTensorInfo; + + constexpr unsigned int input0Shape[] = { 2, 2, 4, 4 }; + constexpr std::size_t dimensionCount = std::extent<decltype(input0Shape)>::value; + + // Check dimension consistency for input tensors + for (unsigned int dimIndex = 0; dimIndex < dimensionCount; ++dimIndex) + { + unsigned int input1Shape[dimensionCount]; + for (unsigned int i = 0; i < dimensionCount; ++i) + { + input1Shape[i] = input0Shape[i]; + } + + ++input1Shape[dimIndex]; + + input0TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + input1TensorInfo = armnn::TensorInfo(dimensionCount, input1Shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + + MultiplicationQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input0TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + + BOOST_CHECK_THROW(RefMultiplicationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } + + // Check dimension consistency for input and output tensors + for (unsigned int dimIndex = 0; dimIndex < dimensionCount; ++dimIndex) + { + unsigned int outputShape[dimensionCount]; + for (unsigned int i = 0; i < dimensionCount; ++i) + { + outputShape[i] = input0Shape[i]; + } + + ++outputShape[dimIndex]; + + input0TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + input1TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(dimensionCount, outputShape, armnn::DataType::Float32); + + MultiplicationQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input0TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + + BOOST_CHECK_THROW(RefMultiplicationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } +} + +BOOST_AUTO_TEST_CASE(ReshapeQueueDescriptor_Validate_MismatchingNumElements) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + // The input and output shapes should have the same number of elements, but these don't + unsigned int inputShape[] = { 1, 1, 2, 3 }; + unsigned int outputShape[] = { 1, 1, 1, 2 }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + ReshapeQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // InvalidArgumentException is expected, because the number of elements don't match + BOOST_CHECK_THROW(RefReshapeFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/WorkloadTestUtils.hpp b/src/armnn/backends/test/WorkloadTestUtils.hpp new file mode 100644 index 0000000000..bac958f57c --- /dev/null +++ b/src/armnn/backends/test/WorkloadTestUtils.hpp @@ -0,0 +1,55 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/Tensor.hpp> +#include <backends/WorkloadInfo.hpp> + +namespace armnn +{ +class ITensorHandle; +} + +template <typename QueueDescriptor> +void AddInputToWorkload(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Inputs.push_back(tensorHandle); + info.m_InputTensorInfos.push_back(tensorInfo); +} + +template <typename QueueDescriptor> +void AddOutputToWorkload(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Outputs.push_back(tensorHandle); + info.m_OutputTensorInfos.push_back(tensorInfo); +} + +template <typename QueueDescriptor> +void SetWorkloadInput(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + unsigned int index, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Inputs[index] = tensorHandle; + info.m_InputTensorInfos[index] = tensorInfo; +} + +template <typename QueueDescriptor> +void SetWorkloadOutput(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + unsigned int index, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Outputs[index] = tensorHandle; + info.m_OutputTensorInfos[index] = tensorInfo; +}
\ No newline at end of file diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp new file mode 100644 index 0000000000..70f78d44af --- /dev/null +++ b/src/armnn/optimizations/All.hpp @@ -0,0 +1,11 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "OptimizeInversePermutes.hpp" +#include "PermuteAsReshape.hpp" +#include "OptimizeConsecutiveReshapes.hpp" +#include "SquashEqualSiblings.hpp" +#include "MovePermuteUp.hpp" diff --git a/src/armnn/optimizations/MovePermuteUp.hpp b/src/armnn/optimizations/MovePermuteUp.hpp new file mode 100644 index 0000000000..8c59986762 --- /dev/null +++ b/src/armnn/optimizations/MovePermuteUp.hpp @@ -0,0 +1,82 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Optimization.hpp" +#include "Permute.hpp" + +namespace armnn +{ +namespace optimizations +{ +class MovePermuteUpImpl +{ +public: + /// Run for every connection between a base Layer (any) and a child PermuteLayer. If the type + /// of the base layer allows it, it moves the permutation to the inputs of the base layer. + /// I.e., adds equivalent permutations before the inputs of the base layer and moves the + /// connections in the output of the child permute layer to the output of the base layer. + void Run(Graph& graph, InputSlot& connection) const + { + OutputSlot& baseOutput = *connection.GetConnectedOutputSlot(); + + if (baseOutput.GetNumConnections() == 1U) + { + Layer& base = baseOutput.GetOwningLayer(); + + if (CanMovePermuteToInputs(base)) + { + auto permute = boost::polymorphic_downcast<PermuteLayer*>(&connection.GetOwningLayer()); + const PermutationVector& perm = permute->GetPermutation(); + + // Insert an equivalent permute before every input of the base layer. + for (auto baseInput = base.BeginInputSlots(); baseInput != base.EndInputSlots(); ++baseInput) + { + // Insert new permute layer. + const std::string name = std::string("moved_up-") + permute->GetName(); + PermuteLayer& permLayer = *graph.InsertNewLayer<PermuteLayer>(*baseInput, perm, name.c_str()); + + // Set output tensor info for the new layer. + OutputSlot& parentOutput = *permLayer.GetInputSlot(0).GetConnectedOutputSlot(); + const TensorInfo permOutInfo = armnnUtils::Permuted(parentOutput.GetTensorInfo(), perm); + permLayer.GetOutputHandler().SetTensorInfo(permOutInfo); + } + + // Set permuted output tensor info + const TensorInfo& childOutInfo = permute->GetOutputHandler().GetTensorInfo(); + base.GetOutputHandler().SetTensorInfo(childOutInfo); + + // Bypass permute. It will be removed as it's left unconnected. + permute->GetOutputSlot().MoveAllConnections(base.GetOutputSlot()); + } + } + } + +protected: + MovePermuteUpImpl() = default; + ~MovePermuteUpImpl() = default; + +private: + static bool CanMovePermuteToInputs(const Layer& base) + { + switch (base.GetType()) + { + case LayerType::Activation: + case LayerType::Addition: + case LayerType::FakeQuantization: + case LayerType::Floor: + case LayerType::MemCopy: + case LayerType::Multiplication: + return true; + default: + return false; + } + } +}; + +using MovePermuteUp = OptimizeForConnection<Layer, PermuteLayer, MovePermuteUpImpl>; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/optimizations/Optimization.hpp b/src/armnn/optimizations/Optimization.hpp new file mode 100644 index 0000000000..89e03ff88d --- /dev/null +++ b/src/armnn/optimizations/Optimization.hpp @@ -0,0 +1,123 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Graph.hpp" +#include "LayersFwd.hpp" + +namespace armnn +{ + +class Optimization +{ +public: + virtual void Run(Graph& graph, Graph::Iterator& pos) const = 0; +protected: + ~Optimization() = default; +}; + +// Wrappers +// The implementation of the following wrappers make use of the CRTP C++ idiom +// (curiously recurring template pattern). +// For details, see https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern + +/// Wrapper Optimization base class that calls Wrapped::Run for every layer of type BaseType. +/// - Wrapped class mustn't remove the base layer. +/// - Base layer is removed if left unconnected after applying the wrapped optimization. +template <typename BaseType, typename Wrapped> +class OptimizeForTypeImpl : public armnn::Optimization, public Wrapped +{ +public: + using Wrapped::Wrapped; + + void Run(Graph& graph, Graph::Iterator& pos) const override + { + Layer* const base = *pos; + + if (base->GetType() == LayerEnumOf<BaseType>()) + { + Wrapped::Run(graph, *boost::polymorphic_downcast<BaseType*>(base)); + } + } + +protected: + ~OptimizeForTypeImpl() = default; +}; + +/// Specialization that calls Wrapped::Run for any layer type +template <typename Wrapped> +class OptimizeForTypeImpl<Layer, Wrapped> : public armnn::Optimization, public Wrapped +{ +public: + using Wrapped::Wrapped; + + void Run(Graph& graph, Graph::Iterator& pos) const override + { + Wrapped::Run(graph, **pos); + } + +protected: + ~OptimizeForTypeImpl() = default; +}; + +template <typename BaseType, typename Wrapped> +class OptimizeForType final : public OptimizeForTypeImpl<BaseType, Wrapped> +{ +public: + using OptimizeForTypeImpl<BaseType, Wrapped>::OptimizeForTypeImpl; +}; + +/// Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. +/// - Wrapped class mustn't remove the base layer. +/// - Wrapped class mustn't affect existing connections in the same output. It might add new ones. +/// - Base and children layers are removed if left unconnected after applying the wrapped optimization. +template <typename BaseType, typename ChildType, typename Wrapped> +class OptimizeForConnectionImpl : public Wrapped +{ +public: + using Wrapped::Wrapped; + + void Run(Graph& graph, BaseType& base) const + { + for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) + { + for (auto&& childInput : output->GetConnections()) + { + if (childInput->GetOwningLayer().GetType() == LayerEnumOf<ChildType>()) + { + Wrapped::Run(graph, *childInput); + } + } + + // Remove unconnected children + for (unsigned int i = 0; i < output->GetNumConnections();) + { + Layer* child = &output->GetConnection(i)->GetOwningLayer(); + + if (child->IsOutputUnconnected()) + { + graph.EraseLayer(child); + } + else + { + ++i; + } + } + } + } + +protected: + ~OptimizeForConnectionImpl() = default; +}; + +template <typename BaseType, typename ChildType, typename Wrapped> +class OptimizeForConnection final + : public OptimizeForTypeImpl<BaseType, OptimizeForConnectionImpl<BaseType, ChildType, Wrapped>> +{ +public: + using OptimizeForTypeImpl<BaseType, OptimizeForConnectionImpl<BaseType, ChildType, Wrapped>>::OptimizeForTypeImpl; +}; + +} // namespace armnn diff --git a/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp b/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp new file mode 100644 index 0000000000..deb49c6884 --- /dev/null +++ b/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Optimization.hpp" + +namespace armnn +{ +namespace optimizations +{ + +class OptimizeConsecutiveReshapesImpl +{ +public: + /// Run for every connection between a base RashapeLayer and a child ReshapeLayer. + /// Inserts an equivalent ReshapeLayer that bypasses both for that connection. + void Run(Graph& graph, InputSlot& connection) const + { + auto& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + auto& child = connection.GetOwningLayer(); + + BOOST_ASSERT(base.GetType() == LayerType::Reshape); + BOOST_ASSERT(child.GetType() == LayerType::Reshape); + + OutputSlot* parentOut = base.GetInputSlot(0).GetConnectedOutputSlot(); + + const TensorInfo& inInfo = parentOut->GetTensorInfo(); + const TensorInfo& outInfo = child.GetOutputHandler().GetTensorInfo(); + + if (inInfo.GetShape() != outInfo.GetShape()) + { + // Insert equivalent reshape before base layer + const std::string name = std::string("merged-") + base.GetName() + std::string("-with-") + child.GetName(); + const ReshapeDescriptor descriptor{outInfo.GetShape()}; + auto& newReshape = *graph.InsertNewLayer<ReshapeLayer>(base.GetInputSlot(0), descriptor, name.c_str()); + // Set tensor info for new layer + newReshape.GetOutputHandler().SetTensorInfo(outInfo); + // Reconnect base with original parent + newReshape.GetOutputSlot().MoveAllConnections(*parentOut); + // Parent is now the new layer + parentOut = &newReshape.GetOutputSlot(); + } + + // Move connections in child output to parent layer. + // Child layer will be removed as it's left unconnected. + // Base layer will be removed if left unconnected. + child.GetOutputSlot().MoveAllConnections(*parentOut); + } + +protected: + OptimizeConsecutiveReshapesImpl() = default; + ~OptimizeConsecutiveReshapesImpl() = default; +}; + +using OptimizeConsecutiveReshapes = OptimizeForConnection<ReshapeLayer, ReshapeLayer, OptimizeConsecutiveReshapesImpl>; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/optimizations/OptimizeInversePermutes.hpp b/src/armnn/optimizations/OptimizeInversePermutes.hpp new file mode 100644 index 0000000000..63820cb7d3 --- /dev/null +++ b/src/armnn/optimizations/OptimizeInversePermutes.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Optimization.hpp" + +namespace armnn +{ +namespace optimizations +{ + +class OptimizeInversePermutesImpl +{ +public: + /// Run for every connection between a base PermuteLayer and a child PermuteLayer. + /// Bypasses both layers for that connection if one is the inverse of the other. + void Run(Graph& graph, InputSlot& connection) const + { + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + auto child = boost::polymorphic_downcast<PermuteLayer*>(&connection.GetOwningLayer()); + + if (child->IsInverse(*boost::polymorphic_downcast<PermuteLayer*>(&base))) + { + // Bypass both layers. Child will be removed as it's left unconnected. + // Base layer will be removed if left unconnected. + child->GetOutputSlot().MoveAllConnections(*base.GetInputSlot(0).GetConnectedOutputSlot()); + } + } + +protected: + OptimizeInversePermutesImpl() = default; + ~OptimizeInversePermutesImpl() = default; +}; + +using OptimizeInversePermutes = OptimizeForConnection<PermuteLayer, PermuteLayer, OptimizeInversePermutesImpl>; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/optimizations/PermuteAsReshape.hpp b/src/armnn/optimizations/PermuteAsReshape.hpp new file mode 100644 index 0000000000..a8e4c2df5e --- /dev/null +++ b/src/armnn/optimizations/PermuteAsReshape.hpp @@ -0,0 +1,70 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Optimization.hpp" + +namespace armnn +{ +namespace optimizations +{ + +class PermuteAsReshapeImpl +{ +public: + /// Run for every PermuteLayer. Replaces it with a ReshapeLayer if they are equivalent. + void Run(Graph& graph, PermuteLayer& permute) const + { + if (IsReshape(permute)) + { + const TensorInfo& outInfo = permute.GetOutputHandler().GetTensorInfo(); + + const std::string name = std::string("as_reshape-") + permute.GetName(); + const ReshapeDescriptor descriptor{outInfo.GetShape()}; + // Insert so layers don't need to be re-sorted + auto reshape = graph.InsertNewLayer<ReshapeLayer>(permute.GetInputSlot(0), descriptor, name.c_str()); + reshape->GetOutputHandler().SetTensorInfo(outInfo); + + // Bypass permute. It will be deleted since it's left unconnected. + permute.GetOutputSlot().MoveAllConnections(reshape->GetOutputSlot()); + } + } + +protected: + PermuteAsReshapeImpl() = default; + ~PermuteAsReshapeImpl() = default; + +private: + static bool IsReshape(const PermuteLayer& layer) + { + const TensorShape& outShape = layer.GetOutputHandler().GetTensorInfo().GetShape(); + const PermutationVector& permutation = layer.GetPermutation(); + + const unsigned int numDimensions = permutation.GetSize(); + + unsigned int lastGtOne = 0; + while ((lastGtOne < numDimensions) && (outShape[(permutation[lastGtOne])] == 1U)) + { + ++lastGtOne; + } + + bool isReshape = true; + for (unsigned int i = lastGtOne + 1U; isReshape && (i < numDimensions); ++i) + { + if (outShape[permutation[i]] > 1U) + { + isReshape = permutation[lastGtOne] < permutation[i]; + lastGtOne = i; + } + } + + return isReshape; + } +}; + +using PermuteAsReshape = OptimizeForType<PermuteLayer, PermuteAsReshapeImpl>; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/optimizations/SquashEqualSiblings.hpp b/src/armnn/optimizations/SquashEqualSiblings.hpp new file mode 100644 index 0000000000..2dfe91fdcc --- /dev/null +++ b/src/armnn/optimizations/SquashEqualSiblings.hpp @@ -0,0 +1,57 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Optimization.hpp" + +namespace armnn +{ +namespace optimizations +{ + +template <typename Comparable> +class SquashEqualSiblingsImpl +{ +public: + /// Run for every connection between a base Layer (any) and a child ComparableLayer. + /// For all siblings of the child layer that compare equal to it, bypasses and removes + /// them. I.e., moves the connections in the outputs of the siblings to the outputs of + /// the child layer, so the siblings are left unconnected (and later removed). + void Run(Graph& graph, InputSlot& connection) const + { + auto& child = connection.GetOwningLayer(); + + if (!child.IsOutputUnconnected()) + { + OutputSlot& baseOutput = *connection.GetConnectedOutputSlot(); + auto& comparableChild = *boost::polymorphic_downcast<Comparable*>(&child); + + for (auto&& it : baseOutput.GetConnections()) + { + Layer& sibling = it->GetOwningLayer(); + if ((&sibling != &child) && comparableChild.IsEqual(sibling)) + { + // Bypass sibling. It will be removed as it's left unconnected. + auto siblingOut = sibling.BeginOutputSlots(); + for (auto childOut = child.BeginOutputSlots(); childOut != child.EndOutputSlots(); ++childOut) + { + siblingOut->MoveAllConnections(*childOut); + ++siblingOut; + } + } + } + } + } + +protected: + SquashEqualSiblingsImpl() = default; + ~SquashEqualSiblingsImpl() = default; +}; + +using SquashEqualPermuteSiblings = OptimizeForConnection<Layer, PermuteLayer, SquashEqualSiblingsImpl<PermuteLayer>>; +using SquashEqualReshapeSiblings = OptimizeForConnection<Layer, ReshapeLayer, SquashEqualSiblingsImpl<ReshapeLayer>>; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp new file mode 100644 index 0000000000..d8aa208eb7 --- /dev/null +++ b/src/armnn/test/CreateWorkload.hpp @@ -0,0 +1,814 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <boost/test/unit_test.hpp> + +#include <boost/cast.hpp> + +#include "backends/WorkloadData.hpp" +#include "Layers.hpp" +#include "Graph.hpp" + +#include <utility> + +#include "backends/CpuTensorHandle.hpp" + +using namespace armnn; + +namespace +{ + +using namespace std; + +// Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type +template<typename Workload> +std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, Graph& graph, const IWorkloadFactory& factory) +{ + std::unique_ptr<IWorkload> workload = layer.CreateWorkload(graph, factory); + BOOST_TEST(workload.get() == boost::polymorphic_downcast<Workload*>(workload.get()), + "Cannot convert to derived class"); + std::string reasonIfUnsupported; + BOOST_TEST(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported)); + return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release())); +} + +// connects two layers +void Connect(Layer* from, Layer* to, const TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0) +{ + from->GetOutputSlot(fromIndex).Connect(to->GetInputSlot(toIndex)); + from->GetOutputHandler(fromIndex).SetTensorInfo(tensorInfo); +} + +// helper function to create tensor handlers for workloads, assuming they all use the same factory +void CreateTensorHandles(armnn::Graph& graph, armnn::IWorkloadFactory& factory) +{ + for (auto&& layer : graph.TopologicalSort()) + { + layer->CreateTensorHandles(graph, factory); + } +} + +///////////////////////////////////////////////////////////////////////////////////////////// +// The following functions are called by backends/test/CreateWorkload*.cpp +// They build very simple graphs, and then create a workload. +// Some checks are performed on the workload to ensure parameters have been passed correctly. +// They return the created workloads so that backend-specific checks can be performed. +///////////////////////////////////////////////////////////////////////////////////////////// + +template <typename ActivationWorkload> +std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + ActivationDescriptor layerDesc; + layerDesc.m_Function = ActivationFunction::Abs; + layerDesc.m_A = 3.5f; + layerDesc.m_B = -10.0f; + + ActivationLayer* const layer = graph.AddLayer<ActivationLayer>(layerDesc, "layer"); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + armnn::TensorInfo tensorInfo({1, 1}, ActivationWorkload::ms_DataType); + + Connect(input, layer, tensorInfo); + Connect(layer, output, tensorInfo); + + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<ActivationWorkload>(*layer, graph, factory); + + ActivationQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_A == 3.5f); + BOOST_TEST(queueDescriptor.m_Parameters.m_B == -10.0f); + BOOST_TEST((queueDescriptor.m_Parameters.m_Function == ActivationFunction::Abs)); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename AdditionWorkload> +std::unique_ptr<AdditionWorkload> CreateAdditionWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + Layer* const layer = graph.AddLayer<AdditionLayer>("layer"); + + // create extra layers + Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1"); + Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + armnn::TensorInfo tensorInfo({2, 3}, AdditionWorkload::ms_DataType); + Connect(input1, layer, tensorInfo, 0, 0); + Connect(input2, layer, tensorInfo, 0, 1); + Connect(layer, output, tensorInfo); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<AdditionWorkload>(*layer, graph, factory); + + AdditionQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 2); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename BatchNormalizationFloat32Workload> +std::unique_ptr<BatchNormalizationFloat32Workload> CreateBatchNormalizationWorkloadTest( + armnn::IWorkloadFactory& factory, armnn::Graph& graph) +{ + // create the layer we're testing + BatchNormalizationDescriptor layerDesc; + layerDesc.m_Eps = 0.05f; + + BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer"); + + armnn::TensorInfo weightInfo({3}, armnn::DataType::Float32); + layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(weightInfo); + layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(weightInfo); + layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(weightInfo); + layer->m_Gamma = std::make_unique<ScopedCpuTensorHandle>(weightInfo); + layer->m_Mean->Allocate(); + layer->m_Variance->Allocate(); + layer->m_Beta->Allocate(); + layer->m_Gamma->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + armnn::TensorInfo tensorInfo({2, 3, 1, 1}, armnn::DataType::Float32); + Connect(input, layer, tensorInfo); + Connect(layer, output, tensorInfo); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<BatchNormalizationFloat32Workload>(*layer, graph, factory); + + BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Parameters.m_Eps == 0.05f); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + BOOST_TEST((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType::Float32))); + BOOST_TEST((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType::Float32))); + BOOST_TEST((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType::Float32))); + BOOST_TEST((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType::Float32))); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename Convolution2dWorkload> +std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + Convolution2dDescriptor layerDesc; + layerDesc.m_PadLeft = 3; + layerDesc.m_PadRight = 3; + layerDesc.m_PadTop = 1; + layerDesc.m_PadBottom = 1; + layerDesc.m_StrideX = 2; + layerDesc.m_StrideY = 4; + layerDesc.m_BiasEnabled = true; + + Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer"); + + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2, 3, 5, 3}, + Convolution2dWorkload::ms_DataType)); + layer->m_Bias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType))); + + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({2, 3, 8, 16}, Convolution2dWorkload::ms_DataType)); + Connect(layer, output, TensorInfo({2, 2, 2, 10}, Convolution2dWorkload::ms_DataType)); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, graph, factory); + + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 2); + BOOST_TEST(queueDescriptor.m_Parameters.m_StrideY == 4); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadLeft == 3); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 3); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadTop == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == true); + + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 5, 3}, + Convolution2dWorkload::ms_DataType))); + BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == + TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType)))); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename Convolution2dWorkload> +std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + Convolution2dDescriptor layerDesc; + layerDesc.m_PadLeft = 1; + layerDesc.m_PadRight = 1; + layerDesc.m_PadTop = 1; + layerDesc.m_PadBottom = 1; + layerDesc.m_StrideX = 1; + layerDesc.m_StrideY = 1; + layerDesc.m_BiasEnabled = true; + + Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer"); + + float inputsQScale = Convolution2dWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0; + float outputQScale = Convolution2dWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0; + + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({ 2, 3, 3, 3 }, + Convolution2dWorkload::ms_DataType, inputsQScale)); + layer->m_Bias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType), inputsQScale)); + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({2, 3, 6, 6}, Convolution2dWorkload::ms_DataType, inputsQScale)); + Connect(layer, output, TensorInfo({2, 2, 6, 6}, Convolution2dWorkload::ms_DataType, outputQScale)); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, graph, factory); + + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_StrideY == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadLeft == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadTop == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == true); + + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 3, 3}, + Convolution2dWorkload::ms_DataType, inputsQScale))); + BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() + == TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType), inputsQScale))); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename DepthwiseConvolution2dFloat32Workload> +std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest( + armnn::IWorkloadFactory& factory, armnn::Graph& graph) +{ + // create the layer we're testing + DepthwiseConvolution2dDescriptor layerDesc; + layerDesc.m_PadLeft = 3; + layerDesc.m_PadRight = 3; + layerDesc.m_PadTop = 1; + layerDesc.m_PadBottom = 1; + layerDesc.m_StrideX = 2; + layerDesc.m_StrideY = 4; + layerDesc.m_BiasEnabled = true; + + DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer"); + + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({3, 3, 5, 3}, DataType::Float32)); + layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({9}, DataType::Float32)); + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32)); + Connect(layer, output, TensorInfo({2, 9, 2, 10}, armnn::DataType::Float32)); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, graph, factory); + + DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 2); + BOOST_TEST(queueDescriptor.m_Parameters.m_StrideY == 4); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadLeft == 3); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 3); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadTop == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == true); + + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({3, 3, 5, 3}, DataType::Float32))); + BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({9}, DataType::Float32))); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename FullyConnectedWorkload> +std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + FullyConnectedDescriptor layerDesc; + layerDesc.m_BiasEnabled = true; + layerDesc.m_TransposeWeightMatrix = true; + + FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer"); + + float inputsQScale = FullyConnectedWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0; + float outputQScale = FullyConnectedWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0; + + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7, 20}, + FullyConnectedWorkload::ms_DataType, inputsQScale, 0)); + layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7}, + GetBiasDataType(FullyConnectedWorkload::ms_DataType), inputsQScale)); + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({3, 1, 4, 5}, FullyConnectedWorkload::ms_DataType, inputsQScale)); + Connect(layer, output, TensorInfo({3, 7}, FullyConnectedWorkload::ms_DataType, outputQScale)); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, graph, factory); + + FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == true); + BOOST_TEST(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true); + + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == + TensorInfo({7, 20}, FullyConnectedWorkload::ms_DataType, inputsQScale))); + BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == + TensorInfo({7}, GetBiasDataType(FullyConnectedWorkload::ms_DataType), inputsQScale))); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename MultiplicationWorkload> +std::unique_ptr<MultiplicationWorkload> CreateMultiplicationWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + Layer* const layer = graph.AddLayer<MultiplicationLayer>("layer"); + + // create extra layers + Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1"); + Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + armnn::TensorInfo tensorInfo({2, 3}, MultiplicationWorkload::ms_DataType); + Connect(input1, layer, tensorInfo, 0, 0); + Connect(input2, layer, tensorInfo, 0, 1); + Connect(layer, output, tensorInfo); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<MultiplicationWorkload>(*layer, graph, factory); + + MultiplicationQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 2); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename NormalizationFloat32Workload> +std::unique_ptr<NormalizationFloat32Workload> CreateNormalizationWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + NormalizationDescriptor layerDesc; + layerDesc.m_NormChannelType = NormalizationAlgorithmChannel::Across; + layerDesc.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness; + layerDesc.m_NormSize = 3; + layerDesc.m_Alpha = 0.5f; + layerDesc.m_Beta = -1.0f; + layerDesc.m_K = 0.2f; + + NormalizationLayer* layer = graph.AddLayer<NormalizationLayer>(layerDesc, "layer"); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({3, 5, 5, 1}, armnn::DataType::Float32)); + Connect(layer, output, TensorInfo({3, 5, 5, 1}, armnn::DataType::Float32)); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<NormalizationFloat32Workload>(*layer, graph, factory); + + NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST((queueDescriptor.m_Parameters.m_NormChannelType == NormalizationAlgorithmChannel::Across)); + BOOST_TEST((queueDescriptor.m_Parameters.m_NormMethodType == NormalizationAlgorithmMethod::LocalBrightness)); + BOOST_TEST(queueDescriptor.m_Parameters.m_NormSize == 3); + BOOST_TEST(queueDescriptor.m_Parameters.m_Alpha == 0.5f); + BOOST_TEST(queueDescriptor.m_Parameters.m_Beta == -1.0f); + BOOST_TEST(queueDescriptor.m_Parameters.m_K == 0.2f); + + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename Pooling2dWorkload> +std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + Pooling2dDescriptor layerDesc; + layerDesc.m_PoolType = PoolingAlgorithm::Average; + layerDesc.m_PoolWidth = 3; + layerDesc.m_PoolHeight = 3; + layerDesc.m_PadLeft = 2; + layerDesc.m_PadRight = 2; + layerDesc.m_PadTop = 1; + layerDesc.m_PadBottom = 1; + layerDesc.m_StrideX = 2; + layerDesc.m_StrideY = 3; + layerDesc.m_OutputShapeRounding = OutputShapeRounding::Floor; + + Pooling2dLayer* const layer = graph.AddLayer<Pooling2dLayer>(layerDesc, "layer"); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({3, 2, 5, 5}, Pooling2dWorkload::ms_DataType)); + Connect(layer, output, TensorInfo({3, 2, 2, 4}, Pooling2dWorkload::ms_DataType)); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<Pooling2dWorkload>(*layer, graph, factory); + + Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST((queueDescriptor.m_Parameters.m_PoolType == PoolingAlgorithm::Average)); + BOOST_TEST((queueDescriptor.m_Parameters.m_OutputShapeRounding == OutputShapeRounding::Floor)); + BOOST_TEST(queueDescriptor.m_Parameters.m_PoolWidth == 3); + BOOST_TEST(queueDescriptor.m_Parameters.m_PoolHeight == 3); + BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 2); + BOOST_TEST(queueDescriptor.m_Parameters.m_StrideY == 3); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadLeft == 2); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 2); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadTop == 1); + BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 1); + + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename SoftmaxWorkload> +std::unique_ptr<SoftmaxWorkload> CreateSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + SoftmaxDescriptor softmaxDescriptor; + Layer* const layer = graph.AddLayer<SoftmaxLayer>(softmaxDescriptor, "layer"); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + armnn::TensorInfo tensorInfo({4, 1}, SoftmaxWorkload::ms_DataType); + Connect(input, layer, tensorInfo); + Connect(layer, output, tensorInfo); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<SoftmaxWorkload>(*layer, graph, factory); + + SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // return so we can do extra, backend-specific tests + return workload; +} + +template<typename SplitterWorkload> +std::unique_ptr<SplitterWorkload> + CreateSplitterWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) +{ + // create the layer we're testing + ViewsDescriptor layerDesc(3, 2); + layerDesc.SetViewOriginCoord(0, 1, 2); // deliberately add these in a weird order + layerDesc.SetViewOriginCoord(2, 1, 0); + layerDesc.SetViewOriginCoord(1, 1, 3); + + Layer* const layer = graph.AddLayer<SplitterLayer>(layerDesc, "layer"); + + // add extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output0 = graph.AddLayer<OutputLayer>(0, "output0"); + Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1"); + Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2"); + + // connect up + armnn::TensorInfo tensorInfo({1, 7}, SplitterWorkload::ms_DataType); + Connect(input, layer, tensorInfo); + + armnn::TensorInfo output0Info({1, 2}, SplitterWorkload::ms_DataType); + armnn::TensorInfo output1Info({1, 1}, SplitterWorkload::ms_DataType); + armnn::TensorInfo output2Info({1, 4}, SplitterWorkload::ms_DataType); + Connect(layer, output1, output1Info, 1, 0); // deliberately connect these up in a weird order + Connect(layer, output0, output0Info, 2, 0); + Connect(layer, output2, output2Info, 0, 0); + + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<SplitterWorkload>(*layer, graph, factory); + + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 3); + BOOST_TEST(queueDescriptor.m_ViewOrigins.size() == 3); + + BOOST_TEST(queueDescriptor.m_ViewOrigins[0].m_Origin[0] == 0); + BOOST_TEST(queueDescriptor.m_ViewOrigins[1].m_Origin[0] == 0); + BOOST_TEST(queueDescriptor.m_ViewOrigins[2].m_Origin[0] == 0); + BOOST_TEST(queueDescriptor.m_ViewOrigins[0].m_Origin[1] == 2); + BOOST_TEST(queueDescriptor.m_ViewOrigins[1].m_Origin[1] == 3); + BOOST_TEST(queueDescriptor.m_ViewOrigins[2].m_Origin[1] == 0); + + // return so we can do extra, backend-specific tests + return workload; +} + +/// This function constructs a graph with both a splitter and a merger, and returns a pair of the workloads +template<typename SplitterWorkload, typename MergerWorkload> +std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<MergerWorkload>> + CreateSplitterMergerWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) +{ + static_assert(SplitterWorkload::ms_DataType == MergerWorkload::ms_DataType, + "Splitter and merger workloads must have the same data type"); + + armnn::TensorInfo inputTensorInfo({ 1, 1, 100, 10 }, SplitterWorkload::ms_DataType); + armnn::TensorInfo splitTensorInfo1({ 1, 1, 60, 10 }, SplitterWorkload::ms_DataType); + armnn::TensorInfo splitTensorInfo2({ 1, 1, 40, 10 }, SplitterWorkload::ms_DataType); + + //construct the graph + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + + armnn::ViewsDescriptor splitterViews(2); + splitterViews.SetViewOriginCoord(0, 0, 0); + splitterViews.SetViewOriginCoord(0, 1, 0); + splitterViews.SetViewOriginCoord(0, 2, 0); + splitterViews.SetViewOriginCoord(0, 3, 0); + + splitterViews.SetViewOriginCoord(1, 0, 0); + splitterViews.SetViewOriginCoord(1, 1, 0); + splitterViews.SetViewOriginCoord(1, 2, 60); + splitterViews.SetViewOriginCoord(1, 3, 0); + + Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter"); + + armnn::OriginsDescriptor mergerViews(2); + mergerViews.SetViewOriginCoord(0, 0, 0); + mergerViews.SetViewOriginCoord(0, 1, 0); + mergerViews.SetViewOriginCoord(0, 2, 0); + mergerViews.SetViewOriginCoord(0, 3, 0); + + mergerViews.SetViewOriginCoord(1, 0, 0); + mergerViews.SetViewOriginCoord(1, 1, 0); + mergerViews.SetViewOriginCoord(1, 2, 40); + mergerViews.SetViewOriginCoord(1, 3, 0); + + Layer* const merger = graph.AddLayer<MergerLayer>(mergerViews, "merger"); + + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // add connections + Connect(input, splitter, inputTensorInfo, 0, 0); + Connect(splitter, merger, splitTensorInfo1, 0, 1); // The splitter & merger are connected up + Connect(splitter, merger, splitTensorInfo2, 1, 0); // so that the outputs are flipped round + Connect(merger, output, inputTensorInfo, 0, 0); + + CreateTensorHandles(graph, factory); + + auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, graph, factory); + auto workloadMerger = MakeAndCheckWorkload<MergerWorkload>(*merger, graph, factory); + + return {std::move(workloadSplitter), std::move(workloadMerger)}; +} + + +/// This function constructs a graph with a splitter with two outputs. Each of the outputs is then +/// connected to two different activation layers +template<typename SplitterWorkload, typename ActivationWorkload> +void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph, + std::unique_ptr<SplitterWorkload>& wlSplitter, + std::unique_ptr<ActivationWorkload>& wlActiv0_0, + std::unique_ptr<ActivationWorkload>& wlActiv0_1, + std::unique_ptr<ActivationWorkload>& wlActiv1_0, + std::unique_ptr<ActivationWorkload>& wlActiv1_1) +{ + static_assert(SplitterWorkload::ms_DataType == ActivationWorkload::ms_DataType, + "Splitter and activation workloads must have the same data type"); + + armnn::TensorInfo inputTensorInfo({ 1, 1, 100, 10 }, SplitterWorkload::ms_DataType); + armnn::TensorInfo splitTensorInfo1({ 1, 1, 60, 10 }, SplitterWorkload::ms_DataType); + armnn::TensorInfo splitTensorInfo2({ 1, 1, 40, 10 }, SplitterWorkload::ms_DataType); + + //construct the graph + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + + armnn::ViewsDescriptor splitterViews(2); + splitterViews.SetViewOriginCoord(0, 0, 0); + splitterViews.SetViewOriginCoord(0, 1, 0); + splitterViews.SetViewOriginCoord(0, 2, 0); + splitterViews.SetViewOriginCoord(0, 3, 0); + + splitterViews.SetViewOriginCoord(1, 0, 0); + splitterViews.SetViewOriginCoord(1, 1, 0); + splitterViews.SetViewOriginCoord(1, 2, 60); + splitterViews.SetViewOriginCoord(1, 3, 0); + + Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter"); + + armnn::ActivationDescriptor activationDesc; + + Layer* const activ0_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_0"); + Layer* const activ0_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_1"); + Layer* const activ1_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_0"); + Layer* const activ1_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_1"); + + Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1"); + Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2"); + Layer* const output3 = graph.AddLayer<OutputLayer>(3, "output3"); + Layer* const output4 = graph.AddLayer<OutputLayer>(4, "output4"); + + // add connections + Connect(input, splitter, inputTensorInfo, 0, 0); + Connect(splitter, activ0_0, splitTensorInfo1, 0, 0); + Connect(splitter, activ0_1, splitTensorInfo1, 0, 0); + + Connect(splitter, activ1_0, splitTensorInfo2, 1, 0); + Connect(splitter, activ1_1, splitTensorInfo2, 1, 0); + + Connect(activ0_0, output1, splitTensorInfo1, 0, 0); + Connect(activ0_1, output2, splitTensorInfo1, 0, 0); + Connect(activ1_0, output3, splitTensorInfo2, 0, 0); + Connect(activ1_1, output4, splitTensorInfo2, 0, 0); + + CreateTensorHandles(graph, factory); + + auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, graph, factory); + auto workloadActiv0_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_0, graph, factory); + auto workloadActiv0_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_1, graph, factory); + auto workloadActiv1_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_0, graph, factory); + auto workloadActiv1_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_1, graph, factory); + + wlSplitter = std::move(workloadSplitter); + wlActiv0_0 = std::move(workloadActiv0_0); + wlActiv0_1 = std::move(workloadActiv0_1); + wlActiv1_0 = std::move(workloadActiv1_0); + wlActiv1_1 = std::move(workloadActiv1_1); +} + +template <typename ResizeBilinearWorkload> +std::unique_ptr<ResizeBilinearWorkload> CreateResizeBilinearWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + TensorShape outputShape({ 2, 3, 2, 2 }); + ResizeBilinearDescriptor resizeDesc; + resizeDesc.m_TargetWidth = outputShape[3]; + resizeDesc.m_TargetHeight = outputShape[2]; + Layer* const layer = graph.AddLayer<ResizeBilinearLayer>(resizeDesc, "layer"); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + armnn::TensorInfo inputTensorInfo({ 2, 3, 4, 4 }, ResizeBilinearWorkload::ms_DataType); + armnn::TensorInfo outputTensorInfo(outputShape, ResizeBilinearWorkload::ms_DataType); + Connect(input, layer, inputTensorInfo); + Connect(layer, output, outputTensorInfo); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<ResizeBilinearWorkload>(*layer, graph, factory); + + ResizeBilinearQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename L2NormalizationWorkload> +std::unique_ptr<L2NormalizationWorkload> CreateL2NormalizationWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + Layer* const layer = graph.AddLayer<L2NormalizationLayer>("l2norm"); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + armnn::TensorInfo inputTensorInfo({ 5, 20, 50, 67 }, L2NormalizationWorkload::ms_DataType); + armnn::TensorInfo outputTensorInfo({ 5, 20, 50, 67 }, L2NormalizationWorkload::ms_DataType); + Connect(input, layer, inputTensorInfo); + Connect(layer, output, outputTensorInfo); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, graph, factory); + + L2NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // return so we can do extra, backend-specific tests + return workload; +} + +template <typename ReshapeWorkload> +std::unique_ptr<ReshapeWorkload> CreateReshapeWorkloadTest(armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // create the layer we're testing + TensorShape outputShape({ 1, 4 }); + ReshapeDescriptor reshapeDesc; + reshapeDesc.m_TargetShape = outputShape; + Layer* const layer = graph.AddLayer<ReshapeLayer>(reshapeDesc, "layer"); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + armnn::TensorInfo inputTensorInfo({ 4, 1 }, ReshapeWorkload::ms_DataType); + armnn::TensorInfo outputTensorInfo(outputShape, ReshapeWorkload::ms_DataType); + Connect(input, layer, inputTensorInfo); + Connect(layer, output, outputTensorInfo); + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<ReshapeWorkload>(*layer, graph, factory); + + ReshapeQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // return so we can do extra, backend-specific tests + return workload; +} + +} diff --git a/src/armnn/test/CreateWorkloadClNeon.hpp b/src/armnn/test/CreateWorkloadClNeon.hpp new file mode 100644 index 0000000000..a41a70755f --- /dev/null +++ b/src/armnn/test/CreateWorkloadClNeon.hpp @@ -0,0 +1,107 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "CreateWorkload.hpp" +#include "backends/RefWorkloadFactory.hpp" + +#if ARMCOMPUTECL_ENABLED +#include "backends/ClTensorHandle.hpp" +#endif + +#if ARMCOMPUTENEON_ENABLED +#include "backends/NeonTensorHandle.hpp" +#endif + + +using namespace armnn; + +namespace +{ + +using namespace std; + +template<typename IComputeTensorHandle> +boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandle* tensorHandle, + std::initializer_list<unsigned int> expectedDimensions) +{ + arm_compute::ITensorInfo* info = tensorHandle->GetTensor().info(); + + auto infoNumDims = info->num_dimensions(); + auto numExpectedDims = expectedDimensions.size(); + if (infoNumDims != numExpectedDims) + { + boost::test_tools::predicate_result res(false); + res.message() << "Different number of dimensions [" << info->num_dimensions() + << "!=" << expectedDimensions.size() << "]"; + return res; + } + + size_t i = info->num_dimensions() - 1; + + for (unsigned int expectedDimension : expectedDimensions) + { + if (info->dimension(i) != expectedDimension) + { + boost::test_tools::predicate_result res(false); + res.message() << "Different dimension [" << info->dimension(i) << "!=" << expectedDimension << "]"; + return res; + } + + i--; + } + + return true; +} + +template<template <DataType> class CopyFromCpuWorkload, template <DataType> class CopyToCpuWorkload, + typename IComputeTensorHandle> +void CreateMemCopyWorkloads(IWorkloadFactory& factory) +{ + Graph graph; + RefWorkloadFactory refFactory; + + // create the layers we're testing + Layer* const layer1 = graph.AddLayer<MemCopyLayer>("layer1"); + Layer* const layer2 = graph.AddLayer<MemCopyLayer>("layer2"); + + // create extra layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // connect up + TensorInfo tensorInfo({2, 3}, DataType::Float32); + Connect(input, layer1, tensorInfo); + Connect(layer1, layer2, tensorInfo); + Connect(layer2, output, tensorInfo); + + input->CreateTensorHandles(graph, refFactory); + layer1->CreateTensorHandles(graph, factory); + layer2->CreateTensorHandles(graph, refFactory); + output->CreateTensorHandles(graph, refFactory); + + // make the workloads and check them + auto workload1 = MakeAndCheckWorkload<CopyFromCpuWorkload<DataType::Float32>>(*layer1, graph, factory); + auto workload2 = MakeAndCheckWorkload<CopyToCpuWorkload<DataType::Float32>>(*layer2, graph, refFactory); + + MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData(); + BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor1.m_Outputs.size() == 1); + auto inputHandle1 = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor1.m_Inputs[0]); + auto outputHandle1 = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor1.m_Outputs[0]); + BOOST_TEST((inputHandle1->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(outputHandle1, {2, 3})); + + + MemCopyQueueDescriptor queueDescriptor2 = workload2->GetData(); + BOOST_TEST(queueDescriptor2.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor2.m_Outputs.size() == 1); + auto inputHandle2 = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor2.m_Inputs[0]); + auto outputHandle2 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor2.m_Outputs[0]); + BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(inputHandle2, {2, 3})); + BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); +} + +}
\ No newline at end of file diff --git a/src/armnn/test/EndToEndTest.cpp b/src/armnn/test/EndToEndTest.cpp new file mode 100644 index 0000000000..77a1f071a8 --- /dev/null +++ b/src/armnn/test/EndToEndTest.cpp @@ -0,0 +1,411 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "armnn/Descriptors.hpp" +#include "armnn/IRuntime.hpp" +#include "armnn/INetwork.hpp" + +#include "backends/test/QuantizeHelper.hpp" +#include <boost/core/ignore_unused.hpp> + +BOOST_AUTO_TEST_SUITE(EndToEnd) + +namespace +{ +template<typename T> +bool IsFloatIterFunc(T iter) +{ + boost::ignore_unused(iter); + return IsFloatingPointIterator<T>::value; +} +} //namespace + +BOOST_AUTO_TEST_CASE(QuantizedHelper) +{ + std::vector<float> fArray; + BOOST_TEST(IsFloatIterFunc(fArray.begin()) == true); + BOOST_TEST(IsFloatIterFunc(fArray.cbegin()) == true); + + std::vector<double> dArray; + BOOST_TEST(IsFloatIterFunc(dArray.begin()) == true); + + std::vector<int> iArray; + BOOST_TEST(IsFloatIterFunc(iArray.begin()) == false); + + float floats[5]; + BOOST_TEST(IsFloatIterFunc(&floats[0]) == true); + + int ints[5]; + BOOST_TEST(IsFloatIterFunc(&ints[0]) == false); +} + +BOOST_AUTO_TEST_CASE(Unsigned8) +{ + using namespace armnn; + + // Create runtime in which test will run + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef)); + + // build up the structure of the network + armnn::INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0, "input"); + IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); + softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // set the tensors in the network + TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationOffset(100); + inputTensorInfo.SetQuantizationScale(10000.0f); + input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + + TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationOffset(0); + outputTensorInfo.SetQuantizationScale(1.0f/255.0f); + softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec()); + + // load it into the runtime + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + // create structures for input & output + std::vector<uint8_t> inputData + { + 1, 10, 3, 200, 5 // some inputs - one of which is sufficiently larger than the others to saturate softmax + }; + std::vector<uint8_t> outputData(5); + + armnn::InputTensors inputTensors + { + {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} + }; + armnn::OutputTensors outputTensors + { + {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + // do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // check the results + BOOST_TEST(outputData[0] == 0); + BOOST_TEST(outputData[1] == 0); + BOOST_TEST(outputData[2] == 0); + BOOST_TEST(outputData[3] == 255); // softmax has been saturated + BOOST_TEST(outputData[4] == 0); +} + +template <typename T> +void ConstantUsageTest(armnn::Compute computeDevice, + const armnn::TensorInfo& commonTensorInfo, + const std::vector<T>& inputData, + const std::vector<T>& constantData, + const std::vector<T>& expectedOutputData) +{ + using namespace armnn; + + // Create runtime in which test will run + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(computeDevice)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData)); + IConnectableLayer* add = net->AddAdditionLayer(); + IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + constant->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // set the tensors in the network + input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); + constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); + add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec()); + + // load it into the runtime + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + // create structures for input & output + std::vector<T> outputData(inputData.size()); + + InputTensors inputTensors + { + {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} + }; + OutputTensors outputTensors + { + {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + // do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // check the results + BOOST_TEST(outputData == expectedOutputData); +} + +static void ConstantUsageFloat32Test(armnn::Compute computeDevice) +{ + const armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::Float32); + + ConstantUsageTest(computeDevice, + commonTensorInfo, + std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // input + std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // const input + std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // expected output + ); +} + +static void ConstantUsageUint8Test(armnn::Compute computeDevice) +{ + armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::QuantisedAsymm8); + + const float scale = 0.023529f; + const int8_t offset = -43; + + commonTensorInfo.SetQuantizationScale(scale); + commonTensorInfo.SetQuantizationOffset(offset); + + ConstantUsageTest(computeDevice, + commonTensorInfo, + QuantizedVector<uint8_t>(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // input + QuantizedVector<uint8_t>(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // const input + QuantizedVector<uint8_t>(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }) // expected output + ); +} + +BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Float32) +{ + ConstantUsageFloat32Test(armnn::Compute::CpuRef); +} + +#if ARMCOMPUTENEON_ENABLED +BOOST_AUTO_TEST_CASE(ConstantUsage_Neon_Float32) +{ + ConstantUsageFloat32Test(armnn::Compute::CpuAcc); +} +#endif + +#if ARMCOMPUTECL_ENABLED +BOOST_AUTO_TEST_CASE(ConstantUsage_Cl_Float32) +{ + ConstantUsageFloat32Test(armnn::Compute::GpuAcc); +} +#endif + +BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Uint8) +{ + ConstantUsageUint8Test(armnn::Compute::CpuRef); +} + +BOOST_AUTO_TEST_CASE(TrivialAdd) +{ + // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp + + using namespace armnn; + + // Create runtime in which test will run + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef)); + + // build up the structure of the network + armnn::INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input1 = net->AddInputLayer(0); + IConnectableLayer* input2 = net->AddInputLayer(1); + IConnectableLayer* add = net->AddAdditionLayer(); + IConnectableLayer* output = net->AddOutputLayer(0); + + input1->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input2->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // set the tensors in the network + TensorInfo tensorInfo(TensorShape({3, 4}), DataType::Float32); + input1->GetOutputSlot(0).SetTensorInfo(tensorInfo); + input2->GetOutputSlot(0).SetTensorInfo(tensorInfo); + add->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec()); + + // load it into the runtime + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + // create structures for input & output - matching android nn test + std::vector<float> input1Data + { + 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f + }; + std::vector<float> input2Data + { + 100.f, 200.f, 300.f, 400.f, 500.f, 600.f, 700.f, 800.f, 900.f, 1000.f, 1100.f, 1200.f + }; + std::vector<float> outputData(12); + + InputTensors inputTensors + { + {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())}, + {1,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())} + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + // do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // check the results + BOOST_TEST(outputData[0] == 101); + BOOST_TEST(outputData[1] == 202); + BOOST_TEST(outputData[2] == 303); + BOOST_TEST(outputData[3] == 404); + BOOST_TEST(outputData[4] == 505); + BOOST_TEST(outputData[5] == 606); + BOOST_TEST(outputData[6] == 707); + BOOST_TEST(outputData[7] == 808); + BOOST_TEST(outputData[8] == 909); + BOOST_TEST(outputData[9] == 1010); + BOOST_TEST(outputData[10] == 1111); + BOOST_TEST(outputData[11] == 1212); +} + +BOOST_AUTO_TEST_CASE(MultipleOutputs) +{ + using namespace armnn; + + // Create runtime in which test will run + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + + // ReLu1 + ActivationDescriptor activation1Descriptor; + activation1Descriptor.m_Function = ActivationFunction::BoundedReLu; + activation1Descriptor.m_A = 1.f; + activation1Descriptor.m_B = -1.f; + IConnectableLayer* activation1 = net->AddActivationLayer(activation1Descriptor); + + // ReLu6 + ActivationDescriptor activation2Descriptor; + activation2Descriptor.m_Function = ActivationFunction::BoundedReLu; + activation2Descriptor.m_A = 6.0f; + IConnectableLayer* activation2 = net->AddActivationLayer(activation2Descriptor); + + // BoundedReLu(min=2, max=5) + ActivationDescriptor activation3Descriptor; + activation3Descriptor.m_Function = ActivationFunction::BoundedReLu; + activation3Descriptor.m_A = 5.0f; + activation3Descriptor.m_B = 2.0f; + IConnectableLayer* activation3 = net->AddActivationLayer(activation3Descriptor); + + IConnectableLayer* output1 = net->AddOutputLayer(0); + IConnectableLayer* output2 = net->AddOutputLayer(1); + IConnectableLayer* output3 = net->AddOutputLayer(2); + + input->GetOutputSlot(0).Connect(activation1->GetInputSlot(0)); + input->GetOutputSlot(0).Connect(activation2->GetInputSlot(0)); + input->GetOutputSlot(0).Connect(activation3->GetInputSlot(0)); + + activation1->GetOutputSlot(0).Connect(output1->GetInputSlot(0)); + activation2->GetOutputSlot(0).Connect(output2->GetInputSlot(0)); + activation3->GetOutputSlot(0).Connect(output3->GetInputSlot(0)); + + // set the tensors in the network + TensorInfo tensorInfo(TensorShape({ 10 }), DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(tensorInfo); + activation1->GetOutputSlot(0).SetTensorInfo(tensorInfo); + activation2->GetOutputSlot(0).SetTensorInfo(tensorInfo); + activation3->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec()); + + // load it into the runtime + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + // create structures for input & output + const std::vector<float> inputData{ 3.f, 5.f, 2.f, 3.f, 7.f, 0.f, -2.f, -1.f, 3.f, 3.f }; + + std::vector<float> output1Data(inputData.size()); + std::vector<float> output2Data(inputData.size()); + std::vector<float> output3Data(inputData.size()); + + InputTensors inputTensors + { + {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), output1Data.data())}, + {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), output2Data.data())}, + {2,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 2), output3Data.data())} + }; + + // do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // check the results + BOOST_TEST(output1Data == std::vector<float>({ 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, -1.f, -1.f, 1.f, 1.f })); // ReLu1 + BOOST_TEST(output2Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 6.f, 0.f, 0.f, 0.f, 3.f, 3.f })); // ReLu6 + BOOST_TEST(output3Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 5.f, 2.f, 2.f, 2.f, 3.f, 3.f })); // [2, 5] +} + +#if ARMCOMPUTENEON_ENABLED +BOOST_AUTO_TEST_CASE(ErrorOnLoadNetwork) +{ + using namespace armnn; + + // Create runtime in which test will run + // Note we don't allow falling back to CpuRef if an operation (excluding inputs, outputs, etc.) isn't supported + armnn::IRuntime::CreationOptions options(armnn::Compute::CpuAcc); + options.m_UseCpuRefAsFallback = false; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so LoadNetwork will fail. + NormalizationDescriptor descriptor; + IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor); + + IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); + pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); + pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec()); + + // Load it into the runtime. It should fail. + NetworkId netId; + BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Failure); +} +#endif // ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp new file mode 100644 index 0000000000..473cda1247 --- /dev/null +++ b/src/armnn/test/GraphTests.cpp @@ -0,0 +1,497 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "armnn/ArmNN.hpp" +#include "Graph.hpp" +#include "Layer.hpp" +#include "Layers.hpp" +#include "armnn/TypesUtils.hpp" +#include "armnn/Exceptions.hpp" + +#include "GraphUtils.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include <boost/cast.hpp> + +/// checks that first comes before second in the order +bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second) +{ + graph.Print(); + + const auto& order = graph.TopologicalSort(); + + auto firstPos = std::find(order.begin(), order.end(), first); + auto secondPos = std::find(firstPos, order.end(), second); + + return (secondPos != order.end()); +} + +static armnn::Layer* GetFirstLayerWithName(armnn::Graph& graph, const std::string& name) +{ + for (auto&& layer : graph) + { + if (layer->GetNameStr() == name) + { + return layer; + } + } + return nullptr; +} + +BOOST_AUTO_TEST_SUITE(Graph) + +BOOST_AUTO_TEST_CASE(ClassGraph) +{ + armnn::Graph graph; + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::InputLayer>(0, "layerA")); + BOOST_TEST(GraphHasNamedLayer(graph, "layerA")); +} + +BOOST_AUTO_TEST_CASE(TopologicalSort) +{ + armnn::Graph graph; + + armnn::ActivationDescriptor activationDefaults; + + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::InputLayer>(0, "layerA")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerB")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::AdditionLayer>("layerC")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::OutputLayer>(0, "output")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerD")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerE")); + + armnn::Layer* const layerA = GetFirstLayerWithName(graph, "layerA"); + armnn::Layer* const layerB = GetFirstLayerWithName(graph, "layerB"); + armnn::Layer* const layerC = GetFirstLayerWithName(graph, "layerC"); + armnn::Layer* const layerO = GetFirstLayerWithName(graph, "output"); + armnn::Layer* const layerE = GetFirstLayerWithName(graph, "layerE"); + armnn::Layer* const layerD = GetFirstLayerWithName(graph, "layerD"); + + // simple graph which branches and rejoins + // A + // / \' + // D E + // \ | + // \ B + // \| + // C + layerA->GetOutputSlot(0).Connect(layerD->GetInputSlot(0)); + layerA->GetOutputSlot(0).Connect(layerE->GetInputSlot(0)); + layerE->GetOutputSlot(0).Connect(layerB->GetInputSlot(0)); + layerD->GetOutputSlot(0).Connect(layerC->GetInputSlot(0)); + layerB->GetOutputSlot(0).Connect(layerC->GetInputSlot(1)); + layerC->GetOutputSlot(0).Connect(layerO->GetInputSlot(0)); + + // check order is valid + BOOST_TEST(CheckOrder(graph, layerA, layerD)); + BOOST_TEST(CheckOrder(graph, layerA, layerE)); + BOOST_TEST(CheckOrder(graph, layerD, layerC)); + BOOST_TEST(CheckOrder(graph, layerE, layerB)); + BOOST_TEST(CheckOrder(graph, layerB, layerC)); +} + +BOOST_AUTO_TEST_CASE(InsertNewLayer) +{ + armnn::Graph graph; + armnn::TensorInfo tensorInfo({ 1, 1, 1, 1 }, armnn::DataType::Float32); + + std::vector<armnn::Layer*> order; + + armnn::ActivationDescriptor activationDefaults; + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::InputLayer>(0, "layerA")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerB")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerC")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::AdditionLayer>("layerD")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::OutputLayer>(0, "output")); + + armnn::Layer* const layerA = GetFirstLayerWithName(graph, "layerA"); + armnn::Layer* const layerB = GetFirstLayerWithName(graph, "layerB"); + armnn::Layer* const layerC = GetFirstLayerWithName(graph, "layerC"); + armnn::Layer* const layerD = GetFirstLayerWithName(graph, "layerD"); + armnn::Layer* const layerO = GetFirstLayerWithName(graph, "output"); + + // A + // / \' + // B C + // \ / + // D + layerA->GetOutputSlot(0).SetTensorInfo(tensorInfo); + layerB->GetOutputSlot(0).SetTensorInfo(tensorInfo); + layerC->GetOutputSlot(0).SetTensorInfo(tensorInfo); + layerD->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + layerA->GetOutputSlot(0).Connect(layerB->GetInputSlot(0)); + layerA->GetOutputSlot(0).Connect(layerC->GetInputSlot(0)); + layerB->GetOutputSlot(0).Connect(layerD->GetInputSlot(0)); + layerC->GetOutputSlot(0).Connect(layerD->GetInputSlot(1)); + layerD->GetOutputSlot(0).Connect(layerO->GetInputSlot(0)); + + // check order is valid + BOOST_TEST(CheckOrder(graph, layerA, layerB)); + BOOST_TEST(CheckOrder(graph, layerA, layerC)); + BOOST_TEST(CheckOrder(graph, layerB, layerD)); + BOOST_TEST(CheckOrder(graph, layerC, layerD)); + + // A + // / \' + // B C + // \ | + // \ E + // \| + // D + BOOST_CHECK_NO_THROW(graph.InsertNewLayer<armnn::ActivationLayer>(layerD->GetInputSlot(1), + activationDefaults, + "layerE")); + + armnn::Layer* const layerE = GetFirstLayerWithName(graph, "layerE"); + + // check order is valid + BOOST_TEST(CheckOrder(graph, layerA, layerB)); + BOOST_TEST(CheckOrder(graph, layerA, layerC)); + BOOST_TEST(CheckOrder(graph, layerB, layerD)); + BOOST_TEST(CheckOrder(graph, layerC, layerE)); + BOOST_TEST(CheckOrder(graph, layerE, layerD)); + + // A + // /| + // / F + // / | + // B C + // \ | + // \ E + // \| + // D + BOOST_CHECK_NO_THROW(graph.InsertNewLayer<armnn::ActivationLayer>(layerC->GetInputSlot(0), + activationDefaults, + "layerF")); + + armnn::Layer* const layerF = GetFirstLayerWithName(graph, "layerF"); + + // check order is valid + BOOST_TEST(CheckOrder(graph, layerA, layerB)); + BOOST_TEST(CheckOrder(graph, layerA, layerF)); + BOOST_TEST(CheckOrder(graph, layerF, layerC)); + BOOST_TEST(CheckOrder(graph, layerB, layerD)); + BOOST_TEST(CheckOrder(graph, layerC, layerE)); + BOOST_TEST(CheckOrder(graph, layerE, layerD)); +} + +namespace +{ + using Edge = std::pair<const armnn::Layer*, const armnn::Layer*>; +} + +static std::vector<Edge> GetEdgeList(const armnn::Graph& graph) +{ + std::vector<Edge> edges; + + for (auto&& srcLayer: graph) + { + const unsigned int numOutputSlots = srcLayer->GetNumOutputSlots(); + for (unsigned int s = 0; s < numOutputSlots; ++s) + { + const armnn::IOutputSlot& outputSlot = srcLayer->GetOutputSlot(s); + const unsigned int numConnections = outputSlot.GetNumConnections(); + for (unsigned int c = 0; c < numConnections; ++c) + { + auto inputSlot = boost::polymorphic_downcast<const armnn::InputSlot*>(outputSlot.GetConnection(c)); + edges.emplace_back(srcLayer, &inputSlot->GetOwningLayer()); + } + } + } + + return edges; +} + +static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armnn::Graph& origGraph) +{ + std::vector<Edge> origEdges = GetEdgeList(origGraph); + std::vector<Edge> newEdges = GetEdgeList(graph); + + // Adding copy layers should not produce any duplicate edges + { + std::vector<Edge> sortedNewEdges = newEdges; + std::sort(sortedNewEdges.begin(), sortedNewEdges.end()); + + auto last = std::unique(sortedNewEdges.begin(), sortedNewEdges.end()); + BOOST_CHECK_MESSAGE(last == sortedNewEdges.end(), "New graph contains duplicate edges!"); + } + + // Each new edge must be tested + while (!newEdges.empty()) + { + const Edge edge = std::move(newEdges.back()); + newEdges.pop_back(); + + // Edge present in the original graph? + int originalEdge = -1; + for (unsigned int i = 0; i < origEdges.size(); i++) + { + const Edge& origEdge = origEdges[i]; + if (origEdge.first->GetNameStr() == edge.first->GetNameStr() && + origEdge.second->GetNameStr() == edge.second->GetNameStr()) + { + originalEdge = boost::numeric_cast<int>(i); + } + } + + if (originalEdge != -1) + { + // Each vertex should correspond to a layer. + const armnn::Layer* srcLayer = edge.first; + const armnn::Layer* dstLayer = edge.second; + BOOST_TEST(srcLayer); + BOOST_TEST(dstLayer); + + // Both layers must have the same compute device. + if (srcLayer && dstLayer) + { + BOOST_TEST((srcLayer->GetComputeDevice() == dstLayer->GetComputeDevice())); + } + + // Mark edge in original graph as observed (by deleting it) + origEdges.erase(origEdges.begin() + originalEdge); + } + else + { + // Edge did not exist in the original graph. + // It must then be an edge connecting a layer and a copy layer. + const armnn::Layer* srcLayer = edge.first; + const armnn::Layer* dstLayer = edge.second; + + if (srcLayer == nullptr || dstLayer == nullptr) + { + BOOST_ERROR("At least one of the two ends of a new edge (" << edge.first << ", " << edge.second << ") " + "introduced after adding copy layers to a graph correspond is not known to the graph"); + continue; + } + + // One and only one of the two layers referenced by the edge should be present in the original graph. + const bool srcLayerInOrigGraph = GraphHasNamedLayer(origGraph, edge.first->GetNameStr()); + const bool dstLayerInOrigGraph = GraphHasNamedLayer(origGraph, edge.second->GetNameStr()); + + if (srcLayerInOrigGraph == dstLayerInOrigGraph) + { + BOOST_ERROR("A new edge (" + << edge.first->GetName() + << ", " + << edge.second->GetName() + << ") introduced after adding copy " + "layers to a graph is invalid. One of the ends should be present in the original " + "graph and the other should not, but " + << (srcLayerInOrigGraph ? "both are" : "none are")); + continue; + } + + const armnn::Layer* copyLayer = srcLayerInOrigGraph ? edge.second : edge.first; + const armnn::Layer* nonCopyLayer = srcLayerInOrigGraph ? srcLayer : dstLayer; + + // Find all edges connecting the copy layer to other layers + std::vector<Edge> adjEdges; + auto it = newEdges.begin(); + while (it != newEdges.end()) + { + Edge& newEdge = *it; + if (copyLayer == (srcLayerInOrigGraph ? newEdge.first : newEdge.second)) + { + adjEdges.push_back(newEdge); + + // Since the adjacent edge is immediately tested below, no need to consider it afterwards + it = newEdges.erase(it); + } + else + { + it++; + } + } + + if (adjEdges.empty()) + { + BOOST_ERROR("An edge connecting a layer and a copy layer exists, (" << edge.first << ", " << + edge.second << "), but no other edges connecting the copy layer '" << copyLayer->GetName() + << "' to other layers could be found"); + continue; + } + + // Test adjacent edges now + for (const Edge& adjEdge : adjEdges) + { + // The adjacent edge must connect the copy layer to another layer + const armnn::Layer* adjLayer = srcLayerInOrigGraph ? adjEdge.second : adjEdge.first; + + if (!adjLayer) + { + BOOST_ERROR("An edge (" << adjEdge.first << ", " << adjEdge.second <<") is adjacent to an edge " + "connecting a layer and a copy layer, (" << edge.first << ", " << edge.second << "), " + "but the non-copy layer in the former, '" << adjLayer->GetName() << "' does not " + "correspond to a layer"); + continue; + } + + // Both layers must have different compute devices + BOOST_TEST((nonCopyLayer->GetComputeDevice() != adjLayer->GetComputeDevice())); + + // There must exist an edge connecting both layers directly in the original graph + { + const armnn::Layer* origEdgeN1 = srcLayerInOrigGraph ? nonCopyLayer : adjLayer; + const armnn::Layer* origEdgeN2 = srcLayerInOrigGraph ? adjLayer : nonCopyLayer; + auto origEdgeIter = std::find(origEdges.begin(), origEdges.end(), + Edge(origEdgeN1, origEdgeN2)); + + if (origEdgeIter != origEdges.end()) + { + origEdges.erase(origEdgeIter); + } + else + { + BOOST_ERROR("An edge (" << adjEdge.first << ", " << adjEdge.second << ") is adjacent to an " + "edge connecting a layer and a copy layer, (" << edge.first << ", " << edge.second << + "), but there is no edge connecting the layers in the original graph"); + } + } + } + } + } + + BOOST_TEST(origEdges.empty(), "Not all of the edges in the original graph correspond to paths in the new graph"); +} + +struct CopyLayersFixture +{ + CopyLayersFixture() + { + using namespace armnn; + using namespace std; + + Layer* const inputLayer = AddLayer<InputLayer>(0, "input"); + inputLayer->SetComputeDevice(Compute::CpuRef); + + Convolution2dDescriptor convolutionDefaults; + Layer* const convLayer1 = AddLayer<Convolution2dLayer>(convolutionDefaults, "conv1"); + convLayer1->SetComputeDevice(Compute::CpuRef); + + inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0)); + + Layer* const convLayer2 = AddLayer<Convolution2dLayer>(convolutionDefaults, "conv2"); + convLayer2->SetComputeDevice(Compute::CpuRef); + + convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0)); + + armnn::OriginsDescriptor mergerDefaults(2); + Layer* const mergerLayer = AddLayer<MergerLayer>(mergerDefaults, "merger"); + mergerLayer->SetComputeDevice(armnn::Compute::CpuRef); + + convLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + convLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + + armnn::ActivationDescriptor activationDefaults; + Layer* const actLayer = AddLayer<ActivationLayer>(activationDefaults, "act"); + actLayer->SetComputeDevice(armnn::Compute::CpuRef); + + mergerLayer->GetOutputSlot(0).Connect(actLayer->GetInputSlot(0)); + + armnn::SoftmaxDescriptor softmaxDefaults; + Layer* const softmaxLayer = AddLayer<SoftmaxLayer>(softmaxDefaults, "softmax"); + softmaxLayer->SetComputeDevice(armnn::Compute::CpuRef); + + actLayer->GetOutputSlot(0).Connect(softmaxLayer->GetInputSlot(0)); + + Layer* const outputLayer = AddLayer<OutputLayer>(0, "output"); + outputLayer->SetComputeDevice(armnn::Compute::CpuRef); + + softmaxLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + } + + armnn::TensorInfo m_TensorDesc; + armnn::Graph m_Graph; + +private: + + template <typename LayerType, typename... Args> + LayerType* AddLayer(Args&&... args) + { + LayerType* const layer = m_Graph.AddLayer<LayerType>(std::forward<Args>(args)...); + + for (auto slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot) + { + slot->SetTensorInfo(m_TensorDesc); + } + + return layer; + }; +}; + +BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture) +{ + const armnn::Graph origGraph(m_Graph); + m_Graph.AddCopyLayers(); + + TestGraphAfterAddingCopyLayers(m_Graph, origGraph); +} + +BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture) +{ + m_Graph.AddCopyLayers(); + + // Calling AddCopyLayers() several times should not change the connections + const std::vector<Edge> edges = GetEdgeList(m_Graph); + for (int i = 0; i < 4; ++i) + { + m_Graph.AddCopyLayers(); + const std::vector<Edge> otherEdges = GetEdgeList(m_Graph); + BOOST_TEST((edges == otherEdges)); + } +} + +BOOST_AUTO_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames) +{ + armnn::Graph graph; + + armnn::InputLayer* const inputLayer = graph.AddLayer<armnn::InputLayer>(0, "input"); + inputLayer->SetComputeDevice(armnn::Compute::CpuRef); + + armnn::ViewsDescriptor splitterDesc(2); + armnn::SplitterLayer* const splitterLayer = graph.AddLayer<armnn::SplitterLayer>(splitterDesc, "splitter"); + splitterLayer->SetComputeDevice(armnn::Compute::GpuAcc); + + armnn::AdditionLayer* const additionLayer = graph.AddLayer<armnn::AdditionLayer>("addition"); + additionLayer->SetComputeDevice(armnn::Compute::CpuRef); + + armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output"); + outputLayer->SetComputeDevice(armnn::Compute::CpuRef); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + splitterLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); + splitterLayer->GetOutputSlot(1).Connect(additionLayer->GetInputSlot(1)); + additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + graph.AddCopyLayers(); + + std::vector<Edge> edges = GetEdgeList(graph); + BOOST_CHECK(edges.size() == 7u); + std::sort(edges.begin(), edges.end()); + auto last = std::unique(edges.begin(), edges.end()); + BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCopyLayers()"); +} + +BOOST_AUTO_TEST_CASE(DuplicateLayerNames) +{ + armnn::Graph graph; + + armnn::InputLayer* const inputLayer = graph.AddLayer<armnn::InputLayer>(0, "layer"); + inputLayer->SetComputeDevice(armnn::Compute::CpuRef); + + armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "layer"); + outputLayer->SetComputeDevice(armnn::Compute::CpuRef); + + inputLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + auto it = graph.TopologicalSort().begin(); + BOOST_TEST(((*it)->GetType() == armnn::LayerType::Input)); + BOOST_TEST(((*std::next(it))->GetType() == armnn::LayerType::Output)); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/GraphUtils.hpp b/src/armnn/test/GraphUtils.hpp new file mode 100644 index 0000000000..3ff7d2f67b --- /dev/null +++ b/src/armnn/test/GraphUtils.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Graph.hpp" +#include <string> + +namespace +{ + +bool GraphHasNamedLayer(const armnn::Graph& graph, const std::string& name) +{ + for (auto&& layer : graph) + { + if (layer->GetName() == name) + { + return true; + } + } + return false; +} +}
\ No newline at end of file diff --git a/src/armnn/test/Network_test.cpp b/src/armnn/test/Network_test.cpp new file mode 100644 index 0000000000..523d47b169 --- /dev/null +++ b/src/armnn/test/Network_test.cpp @@ -0,0 +1,425 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "armnn/ArmNN.hpp" +#include "Network.hpp" +#include "Graph.hpp" +#include "backends/RefWorkloadFactory.hpp" + +#include "GraphUtils.hpp" + +namespace +{ + +bool AreAllLayerInputSlotsConnected(const armnn::IConnectableLayer& layer) +{ + bool allConnected = true; + for (unsigned int i = 0; i < layer.GetNumInputSlots(); ++i) + { + const bool inputConnected = layer.GetInputSlot(i).GetConnection() != nullptr; + allConnected &= inputConnected; + } + return allConnected; +} + +} + +BOOST_AUTO_TEST_SUITE(Network) + +BOOST_AUTO_TEST_CASE(NetworkBasic) +{ + armnn::Network net; + BOOST_TEST(net.PrintGraph() == armnn::Status::Success); +} + +BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForINetwork) +{ + armnn::Network net; + armnn::INetwork& inet = net; + inet.AddInputLayer(0); + inet.AddAdditionLayer(); + inet.AddActivationLayer(armnn::ActivationDescriptor()); + inet.AddOutputLayer(0); +} + +BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForNetwork) +{ + armnn::Network net; + net.AddInputLayer(0); + net.AddAdditionLayer(); + net.AddActivationLayer(armnn::ActivationDescriptor()); + net.AddOutputLayer(0); +} + +BOOST_AUTO_TEST_CASE(NetworkModification) +{ + armnn::Network net; + + armnn::IConnectableLayer* const inputLayer = net.AddInputLayer(0, "input layer"); + BOOST_TEST(inputLayer); + + unsigned int dims[] = { 10,1,1,1 }; + std::vector<float> convWeightsData(10); + armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), convWeightsData); + + armnn::Convolution2dDescriptor convDesc2d; + armnn::IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDesc2d, weights, "conv layer"); + BOOST_TEST(convLayer); + + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + + armnn::FullyConnectedDescriptor fullyConnectedDesc; + armnn::IConnectableLayer* const fullyConnectedLayer = net.AddFullyConnectedLayer(fullyConnectedDesc, + weights, + "fully connected"); + BOOST_TEST(fullyConnectedLayer); + + convLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0)); + + armnn::Pooling2dDescriptor pooling2dDesc; + armnn::IConnectableLayer* const poolingLayer = net.AddPooling2dLayer(pooling2dDesc, "pooling2d"); + BOOST_TEST(poolingLayer); + + fullyConnectedLayer->GetOutputSlot(0).Connect(poolingLayer->GetInputSlot(0)); + + armnn::ActivationDescriptor activationDesc; + armnn::IConnectableLayer* const activationLayer = net.AddActivationLayer(activationDesc, "activation"); + BOOST_TEST(activationLayer); + + poolingLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0)); + + armnn::NormalizationDescriptor normalizationDesc; + armnn::IConnectableLayer* const normalizationLayer = net.AddNormalizationLayer(normalizationDesc, "normalization"); + BOOST_TEST(normalizationLayer); + + activationLayer->GetOutputSlot(0).Connect(normalizationLayer->GetInputSlot(0)); + + armnn::SoftmaxDescriptor softmaxDesc; + armnn::IConnectableLayer* const softmaxLayer = net.AddSoftmaxLayer(softmaxDesc, "softmax"); + BOOST_TEST(softmaxLayer); + + normalizationLayer->GetOutputSlot(0).Connect(softmaxLayer->GetInputSlot(0)); + + armnn::BatchNormalizationDescriptor batchNormDesc; + + armnn::TensorInfo tensorInfo({ 1 }, armnn::DataType::Float32); + std::vector<float> data(tensorInfo.GetNumBytes() / sizeof(float)); + armnn::ConstTensor invalidTensor(tensorInfo, data); + + armnn::IConnectableLayer* const batchNormalizationLayer = net.AddBatchNormalizationLayer(batchNormDesc, + invalidTensor, + invalidTensor, + invalidTensor, + invalidTensor, + "batch norm"); + BOOST_TEST(batchNormalizationLayer); + + softmaxLayer->GetOutputSlot(0).Connect(batchNormalizationLayer->GetInputSlot(0)); + + armnn::IConnectableLayer* const additionLayer = net.AddAdditionLayer("addition"); + BOOST_TEST(additionLayer); + + batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); + batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); + + armnn::IConnectableLayer* const multiplicationLayer = net.AddMultiplicationLayer("multiplication"); + BOOST_TEST(multiplicationLayer); + + additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(0)); + additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(1)); + + armnn::IConnectableLayer* const outputLayer = net.AddOutputLayer(0, "output layer"); + BOOST_TEST(outputLayer); + + multiplicationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + //Test that all layers are present in the graph + BOOST_TEST(net.GetGraph().GetNumLayers() == 11); + + //Test that the vertices exist and have correct names + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "input layer")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "conv layer")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "fully connected")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "pooling2d")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "activation")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "normalization")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "softmax")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "batch norm")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "addition")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "multiplication")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "output layer")); + + auto checkOneOutputToOneInputConnection = [] + (const armnn::IConnectableLayer* const srcLayer, + const armnn::IConnectableLayer* const tgtLayer, + int expectedSrcNumInputs = 1, + int expectedDstNumOutputs = 1) + { + BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs); + BOOST_TEST(srcLayer->GetNumOutputSlots() == 1); + BOOST_TEST(tgtLayer->GetNumInputSlots() == 1); + BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs); + + BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 1); + BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(0) == &tgtLayer->GetInputSlot(0)); + BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(0).GetConnection()); + }; + auto checkOneOutputToTwoInputsConnections = [] + (const armnn::IConnectableLayer* const srcLayer, + const armnn::IConnectableLayer* const tgtLayer, + int expectedSrcNumInputs, + int expectedDstNumOutputs = 1) + { + BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs); + BOOST_TEST(srcLayer->GetNumOutputSlots() == 1); + BOOST_TEST(tgtLayer->GetNumInputSlots() == 2); + BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs); + + BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 2); + for (unsigned int i = 0; i < srcLayer->GetOutputSlot(0).GetNumConnections(); ++i) + { + BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(i) == &tgtLayer->GetInputSlot(i)); + BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(i).GetConnection()); + } + }; + + BOOST_TEST(AreAllLayerInputSlotsConnected(*convLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*fullyConnectedLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*poolingLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*activationLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*normalizationLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*softmaxLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*batchNormalizationLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*additionLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*multiplicationLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*outputLayer)); + + // Check connectivity + checkOneOutputToOneInputConnection(inputLayer, convLayer, 0); + checkOneOutputToOneInputConnection(convLayer, fullyConnectedLayer); + checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer); + checkOneOutputToOneInputConnection(poolingLayer, activationLayer); + checkOneOutputToOneInputConnection(activationLayer, normalizationLayer); + checkOneOutputToOneInputConnection(normalizationLayer, softmaxLayer); + checkOneOutputToOneInputConnection(softmaxLayer, batchNormalizationLayer); + checkOneOutputToTwoInputsConnections(batchNormalizationLayer, additionLayer, 1); + checkOneOutputToTwoInputsConnections(additionLayer, multiplicationLayer, 2); + checkOneOutputToOneInputConnection(multiplicationLayer, outputLayer, 2, 0); +} + +BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMerger) +{ + armnn::Network net; + + // Add an input layer and an input tensor descriptor. + armnn::IConnectableLayer* inputLayer = net.AddInputLayer(0, "input layer"); + BOOST_TEST(inputLayer); + + // Add a splitter layer + armnn::ViewsDescriptor splitterDesc(2,4); + + armnn::IConnectableLayer* splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer"); + BOOST_TEST(splitterLayer); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + + // Add a softmax layer 1 + armnn::SoftmaxDescriptor softmaxDescriptor; + armnn::IConnectableLayer* softmaxLayer1 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1"); + BOOST_TEST(softmaxLayer1); + + splitterLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0)); + + // Add a softmax layer 2 + armnn::IConnectableLayer* softmaxLayer2 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2"); + BOOST_TEST(softmaxLayer2); + + splitterLayer->GetOutputSlot(1).Connect(softmaxLayer2->GetInputSlot(0)); + + // Add a merger layer + armnn::OriginsDescriptor mergerDesc(2, 4); + + armnn::IConnectableLayer* mergerLayer = net.AddMergerLayer(mergerDesc, "merger layer"); + BOOST_TEST(mergerLayer); + + softmaxLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + softmaxLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + + // Add an output layer + armnn::IConnectableLayer* outputLayer = net.AddOutputLayer(0, "output layer"); + BOOST_TEST(outputLayer); + + mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + BOOST_TEST(splitterLayer->GetNumOutputSlots() == 2); + BOOST_TEST(splitterLayer->GetOutputSlot(0).GetConnection(0) == &softmaxLayer1->GetInputSlot(0)); + BOOST_TEST(&splitterLayer->GetOutputSlot(0) == softmaxLayer1->GetInputSlot(0).GetConnection()); + BOOST_TEST(splitterLayer->GetOutputSlot(1).GetConnection(0) == &softmaxLayer2->GetInputSlot(0)); + BOOST_TEST(&splitterLayer->GetOutputSlot(1) == softmaxLayer2->GetInputSlot(0).GetConnection()); + + BOOST_TEST(mergerLayer->GetNumInputSlots() == 2); + BOOST_TEST(softmaxLayer1->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(0)); + BOOST_TEST(&softmaxLayer1->GetOutputSlot(0) == mergerLayer->GetInputSlot(0).GetConnection()); + BOOST_TEST(softmaxLayer2->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(1)); + BOOST_TEST(&softmaxLayer2->GetOutputSlot(0) == mergerLayer->GetInputSlot(1).GetConnection()); +} + +BOOST_AUTO_TEST_CASE(NetworkModification_SplitterAddition) +{ + armnn::Network net; + + // Add an input layer and an input tensor descriptor. + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer"); + BOOST_TEST(layer); + + // Add a splitter layer + armnn::ViewsDescriptor splitterDesc(2,4); + + armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer"); + BOOST_TEST(splitterLayer); + + layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + + // Add a softmax layer 1 + armnn::SoftmaxDescriptor softmaxDescriptor; + armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1"); + BOOST_TEST(softmax1Layer); + + splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0)); + + // Add a softmax layer 2 + armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2"); + BOOST_TEST(softmax2Layer); + + splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0)); + + // Add addition layer + layer = net.AddAdditionLayer("add layer"); + BOOST_TEST(layer); + + softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + + // Add an output layer + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddOutputLayer(0, "output layer"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + BOOST_TEST(layer); +} + +BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMultiplication) +{ + armnn::Network net; + + // Add an input layer and an input tensor descriptor. + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer"); + BOOST_TEST(layer); + + // Add a splitter layer + armnn::ViewsDescriptor splitterDesc(2,4); + armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer"); + BOOST_TEST(splitterLayer); + + layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + + // Add a softmax layer 1 + armnn::SoftmaxDescriptor softmaxDescriptor; + armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1"); + BOOST_TEST(softmax1Layer); + + splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0)); + + // Add a softmax layer 2 + armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2"); + BOOST_TEST(softmax2Layer); + + splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0)); + + // Add multiplication layer + layer = net.AddMultiplicationLayer("multiplication layer"); + BOOST_TEST(layer); + + softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + + // Add an output layer + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddOutputLayer(0, "output layer"); + BOOST_TEST(layer); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); +} + +BOOST_AUTO_TEST_CASE(ValidateWorkloads) +{ + const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); + + armnn::Network net; + + armnn::NormalizationDescriptor nmDesc; + armnn::ActivationDescriptor acDesc; + + // in + // | + // nm + // / | + // ac | + // \ | + // ml + // | + // sm + // | + // ot + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); + + layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).SetTensorInfo(desc); + + layer = net.AddActivationLayer(acDesc, "ac"); + + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddMultiplicationLayer("ml"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + armnn::SoftmaxDescriptor softmaxDescriptor; + layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + layer = net.AddOutputLayer(0, "ot"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + armnn::DeviceSpec spec; + spec.DefaultComputeDevice = armnn::Compute::CpuRef; + + armnn::IOptimizedNetworkPtr optNet = Optimize(net, spec); + static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph().AllocateDynamicBuffers(); + + // validate workloads + armnn::RefWorkloadFactory fact; + for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph()) + { + BOOST_CHECK_NO_THROW( + layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact)); + } +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/RuntimeTests.cpp b/src/armnn/test/RuntimeTests.cpp new file mode 100644 index 0000000000..117df5e55a --- /dev/null +++ b/src/armnn/test/RuntimeTests.cpp @@ -0,0 +1,190 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "armnn/TypesUtils.hpp" + +#include "armnn/IRuntime.hpp" +#include "armnn/INetwork.hpp" +#include "armnn/Descriptors.hpp" +#include "Runtime.hpp" + +#ifdef WITH_VALGRIND +#include "valgrind/memcheck.h" +#endif + +#include <boost/core/ignore_unused.hpp> + +namespace armnn +{ + +void RuntimeLoadedNetworksReserve(armnn::Runtime* runtime) +{ + runtime->m_LoadedNetworks.reserve(1); +} + +} + +BOOST_AUTO_TEST_SUITE(Runtime) + +BOOST_AUTO_TEST_CASE(RuntimeUnloadNetwork) +{ + // build 2 mock-networks and load them into the runtime + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef)); + + // mock network 1 + armnn::NetworkId networkIdentifier1 = 1; + armnn::INetworkPtr mockNetwork1(armnn::INetwork::Create()); + mockNetwork1->AddInputLayer(0, "test layer"); + runtime->LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, runtime->GetDeviceSpec())); + + // mock network 2 + armnn::NetworkId networkIdentifier2 = 2; + armnn::INetworkPtr mockNetwork2(armnn::INetwork::Create()); + mockNetwork2->AddInputLayer(0, "test layer"); + runtime->LoadNetwork(networkIdentifier2, Optimize(*mockNetwork2, runtime->GetDeviceSpec())); + + // unload one by its networkID + BOOST_TEST(runtime->UnloadNetwork(networkIdentifier1) == armnn::Status::Success); + + BOOST_TEST(runtime->UnloadNetwork(networkIdentifier1) == armnn::Status::Failure); +} + +#if defined(ARMCOMPUTECL_ENABLED) && defined(WITH_VALGRIND) +BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage) +{ + // From documentation: + + // This means that no pointer to the block can be found. The block is classified as "lost", + // because the programmer could not possibly have freed it at program exit, since no pointer to it exists. + unsigned long leakedBefore = 0; + unsigned long leakedAfter = 0; + + // A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at, + // the programmer could, at least in principle, have freed it before program exit. + // We want to test this in case memory is not freed as early as it could have been + unsigned long reachableBefore = 0; + unsigned long reachableAfter = 0; + + // needed as out params but we don't test them + unsigned long dubious = 0; + unsigned long suppressed = 0; + + // ensure that runtime is large enough before checking for memory leaks + // otherwise when loading the network it will automatically reserve memory that won't be released until destruction + armnn::NetworkId networkIdentifier; + armnn::Runtime runtime(armnn::Compute::GpuAcc); + armnn::RuntimeLoadedNetworksReserve(&runtime); + + // check for leaks before we load the network and record them so that we can see the delta after unloading + VALGRIND_DO_QUICK_LEAK_CHECK; + VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed); + + // build a mock-network and load it into the runtime + { + armnn::TensorInfo inputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32); + armnn::TensorInfo outputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32); + + armnn::INetworkPtr mockNetwork(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = mockNetwork->AddInputLayer(0, "input"); + armnn::IConnectableLayer* layer = mockNetwork->AddActivationLayer(armnn::ActivationDescriptor(), "test"); + armnn::IConnectableLayer* output = mockNetwork->AddOutputLayer(0, "output"); + + input->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // set the tensors in the network + input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // optimize the network + armnn::IOptimizedNetworkPtr optNet = Optimize(*mockNetwork, runtime.GetDeviceSpec()); + + runtime.LoadNetwork(networkIdentifier, std::move(optNet)); + } + + runtime.UnloadNetwork(networkIdentifier); + + VALGRIND_DO_ADDED_LEAK_CHECK; + VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed); + + // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass + BOOST_TEST(leakedBefore == leakedAfter); + + // Add resonable threshold after and before running valgrind with the ACL clear cache function. + BOOST_TEST(reachableAfter - reachableBefore < 30000); + + // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters + // so they are assigned to, but still considered unused, causing a warning + boost::ignore_unused(dubious); + boost::ignore_unused(suppressed); +} +#endif + +#ifdef WITH_VALGRIND +// run with the following command to get all the amazing output (in the devenv/build folder) :) +// valgrind --leak-check=full --show-leak-kinds=all --log-file=Valgrind_Memcheck_Leak_Report.txt armnn/test/UnitTests +BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak) +{ + // From documentation: + + // This means that no pointer to the block can be found. The block is classified as "lost", + // because the programmer could not possibly have freed it at program exit, since no pointer to it exists. + unsigned long leakedBefore = 0; + unsigned long leakedAfter = 0; + + // A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at, + // the programmer could, at least in principle, have freed it before program exit. + // We want to test this in case memory is not freed as early as it could have been + unsigned long reachableBefore = 0; + unsigned long reachableAfter = 0; + + // needed as out params but we don't test them + unsigned long dubious = 0; + unsigned long suppressed = 0; + + armnn::NetworkId networkIdentifier1 = 1; + + // ensure that runtime is large enough before checking for memory leaks + // otherwise when loading the network it will automatically reserve memory that won't be released until destruction + armnn::Runtime runtime(armnn::Compute::CpuRef); + armnn::RuntimeLoadedNetworksReserve(&runtime); + + // check for leaks before we load the network and record them so that we can see the delta after unloading + VALGRIND_DO_QUICK_LEAK_CHECK; + VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed); + + // build a mock-network and load it into the runtime + { + unsigned int inputShape[] = {1, 7, 1, 1}; + armnn::TensorInfo inputTensorInfo(4, inputShape, armnn::DataType::Float32); + + std::unique_ptr<armnn::Network> mockNetwork1 = std::make_unique<armnn::Network>(); + mockNetwork1->AddInputLayer(0, "test layer"); + + armnn::DeviceSpec device; + device.DefaultComputeDevice = armnn::Compute::CpuRef; + + runtime.LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, device)); + } + + runtime.UnloadNetwork(networkIdentifier1); + + VALGRIND_DO_ADDED_LEAK_CHECK; + VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed); + + // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass + BOOST_TEST(leakedBefore == leakedAfter); + BOOST_TEST(reachableBefore == reachableAfter); + + // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters + // so they are assigned to, but still considered unused, causing a warning + boost::ignore_unused(dubious); + boost::ignore_unused(suppressed); +} +#endif + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/TensorHelpers.hpp b/src/armnn/test/TensorHelpers.hpp new file mode 100644 index 0000000000..e4ff899a4e --- /dev/null +++ b/src/armnn/test/TensorHelpers.hpp @@ -0,0 +1,201 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/TensorFwd.hpp> +#include <boost/test/unit_test.hpp> +#include <boost/multi_array.hpp> +#include <vector> +#include <array> + +#include <boost/assert.hpp> +#include <boost/test/tools/floating_point_comparison.hpp> +#include <boost/random/uniform_real_distribution.hpp> +#include <boost/random/mersenne_twister.hpp> +#include <boost/numeric/conversion/cast.hpp> + +#include "armnn/Tensor.hpp" + +#include "backends/test/QuantizeHelper.hpp" + +#include <cmath> + +constexpr float g_FloatCloseToZeroTolerance = 1.0e-7f; + +template<typename T, bool isQuantized = true> +struct SelectiveComparer +{ + static bool Compare(T a, T b) + { + return (std::max(a, b) - std::min(a, b)) <= 1; + } + +}; + +template<typename T> +struct SelectiveComparer<T, false> +{ + static bool Compare(T a, T b) + { + // if a or b is zero, percent_tolerance does an exact match, so compare to a small, constant tolerance instead + if (a == 0.0f || b == 0.0f) + { + return std::abs(a - b) <= g_FloatCloseToZeroTolerance; + } + // For unquantized floats we use a tolerance of 1%. + boost::math::fpc::close_at_tolerance<float> comparer(boost::math::fpc::percent_tolerance(1.0f)); + return comparer(a, b); + } +}; + +template<typename T> +bool SelectiveCompare(T a, T b) +{ + return SelectiveComparer<T, armnn::IsQuantizedType<T>()>::Compare(a, b); +}; + + + +template <typename T, std::size_t n> +boost::test_tools::predicate_result CompareTensors(const boost::multi_array<T, n>& a, + const boost::multi_array<T, n>& b) +{ + // check they are same shape + for (unsigned int i=0; i<n; i++) + { + if (a.shape()[i] != b.shape()[i]) + { + boost::test_tools::predicate_result res(false); + res.message() << "Different shapes [" + << a.shape()[i] + << "!=" + << b.shape()[i] + << "]"; + return res; + } + } + + // now compare element-wise + + // fun iteration over n dimensions + std::array<unsigned int, n> indices; + for (unsigned int i = 0; i < n; i++) + { + indices[i] = 0; + } + + std::stringstream errorString; + int numFailedElements = 0; + constexpr int maxReportedDifferences = 3; + + while (true) + { + bool comparison = SelectiveCompare(a(indices), b(indices)); + if (!comparison) + { + ++numFailedElements; + + if (numFailedElements <= maxReportedDifferences) + { + if (numFailedElements >= 2) + { + errorString << ", "; + } + errorString << "["; + for (unsigned int i = 0; i < n; ++i) + { + errorString << indices[i]; + if (i != n - 1) + { + errorString << ","; + } + } + errorString << "]"; + + errorString << " (" << +a(indices) << " != " << +b(indices) << ")"; + } + } + + ++indices[n - 1]; + for (unsigned int i=n-1; i>0; i--) + { + if (indices[i] == a.shape()[i]) + { + indices[i] = 0; + ++indices[i - 1]; + } + } + + if (indices[0] == a.shape()[0]) + { + break; + } + } + + boost::test_tools::predicate_result comparisonResult(true); + if (numFailedElements > 0) + { + comparisonResult = false; + comparisonResult.message() << numFailedElements << " different values at: "; + if (numFailedElements > maxReportedDifferences) + { + errorString << ", ... (and " << (numFailedElements - maxReportedDifferences) << " other differences)"; + } + comparisonResult.message() << errorString.str(); + } + + return comparisonResult; +} + + +// Creates a boost::multi_array with shape defined by the given TensorInfo. +template <typename T, std::size_t n> +boost::multi_array<T, n> MakeTensor(const armnn::TensorInfo& tensorInfo) +{ + std::array<unsigned int, n> shape; + + for (unsigned int i = 0; i < n; i++) + { + shape[i] = tensorInfo.GetShape()[i]; + } + + return boost::multi_array<T, n>(shape); +} + +// Creates a boost::multi_array with shape defined by the given TensorInfo and contents defined by the given vector. +template <typename T, std::size_t n> +boost::multi_array<T, n> MakeTensor(const armnn::TensorInfo& tensorInfo, const std::vector<T>& flat) +{ + BOOST_ASSERT_MSG(flat.size() == tensorInfo.GetNumElements(), "Wrong number of components supplied to tensor"); + + std::array<unsigned int, n> shape; + + for (unsigned int i = 0; i < n; i++) + { + shape[i] = tensorInfo.GetShape()[i]; + } + + boost::const_multi_array_ref<T, n> arrayRef(&flat[0], shape); + return boost::multi_array<T, n>(arrayRef); +} + +template <typename T, std::size_t n> +boost::multi_array<T, n> MakeRandomTensor(const armnn::TensorInfo& tensorInfo, + unsigned int seed, + float min = -10.0f, + float max = 10.0f) +{ + boost::random::mt19937 gen(seed); + boost::random::uniform_real_distribution<float> dist(min, max); + + std::vector<float> init(tensorInfo.GetNumElements()); + for (unsigned int i = 0; i < init.size(); i++) + { + init[i] = dist(gen); + } + float qScale = tensorInfo.GetQuantizationScale(); + int32_t qOffset = tensorInfo.GetQuantizationOffset(); + return MakeTensor<T, n>(tensorInfo, QuantizedVector<T>(qScale, qOffset, init)); +} diff --git a/src/armnn/test/TensorTest.cpp b/src/armnn/test/TensorTest.cpp new file mode 100644 index 0000000000..2bb37f4fb8 --- /dev/null +++ b/src/armnn/test/TensorTest.cpp @@ -0,0 +1,146 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> +#include <armnn/Tensor.hpp> + +namespace armnn +{ + +// Add unit test framework for interpreting TensorInfo type +std::ostream& boost_test_print_type(std::ostream& ostr, const TensorInfo& right) +{ + ostr << "TensorInfo[ " + << right.GetNumDimensions() << "," + << right.GetShape()[0] << "," + << right.GetShape()[1] << "," + << right.GetShape()[2] << "," + << right.GetShape()[3] + << " ]" << std::endl; + return ostr; +} + +std::ostream& boost_test_print_type(std::ostream& ostr, const TensorShape& shape) +{ + ostr << "TensorShape[ " + << shape.GetNumDimensions() << "," + << shape[0] << "," + << shape[1] << "," + << shape[2] << "," + << shape[3] + << " ]" << std::endl; + return ostr; +} + +} //namespace armnn +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(Tensor) + +struct TensorInfoFixture +{ + TensorInfoFixture() + { + unsigned int sizes[] = {6,7,8,9}; + m_TensorInfo = TensorInfo(4, sizes, DataType::Float32); + } + ~TensorInfoFixture() {}; + + TensorInfo m_TensorInfo; +}; + +BOOST_FIXTURE_TEST_CASE(ConstructShapeUsingListInitialization, TensorInfoFixture) +{ + TensorShape listInitializedShape{ 6, 7, 8, 9 }; + BOOST_TEST(listInitializedShape == m_TensorInfo.GetShape()); +} + +BOOST_FIXTURE_TEST_CASE(ConstructTensorInfo, TensorInfoFixture) +{ + BOOST_TEST(m_TensorInfo.GetNumDimensions() == 4); + BOOST_TEST(m_TensorInfo.GetShape()[0] == 6); // <= Outer most + BOOST_TEST(m_TensorInfo.GetShape()[1] == 7); + BOOST_TEST(m_TensorInfo.GetShape()[2] == 8); + BOOST_TEST(m_TensorInfo.GetShape()[3] == 9); // <= Inner most +} + +BOOST_FIXTURE_TEST_CASE(CopyConstructTensorInfo, TensorInfoFixture) +{ + TensorInfo copyConstructed(m_TensorInfo); + BOOST_TEST(copyConstructed.GetNumDimensions() == 4); + BOOST_TEST(copyConstructed.GetShape()[0] == 6); + BOOST_TEST(copyConstructed.GetShape()[1] == 7); + BOOST_TEST(copyConstructed.GetShape()[2] == 8); + BOOST_TEST(copyConstructed.GetShape()[3] == 9); +} + +BOOST_FIXTURE_TEST_CASE(TensorInfoEquality, TensorInfoFixture) +{ + TensorInfo copyConstructed(m_TensorInfo); + BOOST_TEST(copyConstructed == m_TensorInfo); +} + +BOOST_FIXTURE_TEST_CASE(TensorInfoInequality, TensorInfoFixture) +{ + TensorInfo other; + unsigned int sizes[] = {2,3,4,5}; + other = TensorInfo(4, sizes, DataType::Float32); + + BOOST_TEST(other != m_TensorInfo); +} + +BOOST_FIXTURE_TEST_CASE(TensorInfoAssignmentOperator, TensorInfoFixture) +{ + TensorInfo copy; + copy = m_TensorInfo; + BOOST_TEST(copy == m_TensorInfo); +} + +void CheckTensor(const ConstTensor& t) +{ + t.GetInfo(); +} + +BOOST_AUTO_TEST_CASE(TensorVsConstTensor) +{ + int mutableDatum = 2; + const int immutableDatum = 3; + + armnn::Tensor uninitializedTensor; + armnn::ConstTensor uninitializedTensor2; + + uninitializedTensor2 = uninitializedTensor; + + armnn::Tensor t(TensorInfo(), &mutableDatum); + armnn::ConstTensor ct(TensorInfo(), &immutableDatum); + + // Check that both Tensor and ConstTensor can be passed as a ConstTensor + CheckTensor(t); + CheckTensor(ct); +} + +BOOST_AUTO_TEST_CASE(ModifyTensorInfo) +{ + TensorInfo info; + info.SetShape({ 5, 6, 7, 8 }); + BOOST_TEST((info.GetShape() == TensorShape({ 5, 6, 7, 8 }))); + info.SetDataType(DataType::QuantisedAsymm8); + BOOST_TEST((info.GetDataType() == DataType::QuantisedAsymm8)); + info.SetQuantizationScale(10.0f); + BOOST_TEST(info.GetQuantizationScale() == 10.0f); + info.SetQuantizationOffset(5); + BOOST_TEST(info.GetQuantizationOffset() == 5); +} + +BOOST_AUTO_TEST_CASE(TensorShapeOperatorBrackets) +{ + TensorShape shape({0,1,2,3}); + // Check version of operator[] which returns an unsigned int + BOOST_TEST(shape[2] == 2); + // Check the version of operator[] which returns a reference + shape[2] = 20; + BOOST_TEST(shape[2] == 20); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/UnitTests.cpp b/src/armnn/test/UnitTests.cpp new file mode 100644 index 0000000000..0e2f99583f --- /dev/null +++ b/src/armnn/test/UnitTests.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#define BOOST_TEST_MODULE UnitTests +#include <boost/test/unit_test.hpp> + +#include "UnitTests.hpp" + +struct ConfigureLoggingFixture +{ + ConfigureLoggingFixture() + { + ConfigureLoggingTest(); + } +}; + +BOOST_GLOBAL_FIXTURE(ConfigureLoggingFixture); + +// On Windows, duplicate the boost test logging output to the Visual Studio output window using OutputDebugString. +#if defined(_MSC_VER) + +#include <boost/iostreams/filtering_stream.hpp> +#include <boost/iostreams/tee.hpp> +#include <iostream> +#include <Windows.h> + +using namespace boost::iostreams; +using namespace std; + +struct DebugOutputSink : boost::iostreams::sink +{ + std::streamsize write(const char* s, std::streamsize n) + { + // The given string is not null-terminated, so we need to copy it. + std::string s2(s, boost::numeric_cast<size_t>(n)); + OutputDebugString(s2.c_str()); + return n; + } +}; + +class SetupDebugOutput +{ +public: + SetupDebugOutput() + { + // Send the output to both cout (as standard) and the debug output. + m_OutputStream.push(tee(std::cout)); + m_OutputStream.push(m_DebugOutputSink); + + boost::unit_test::unit_test_log.set_stream(m_OutputStream); + } +private: + filtering_ostream m_OutputStream; + DebugOutputSink m_DebugOutputSink; +}; + +BOOST_GLOBAL_FIXTURE(SetupDebugOutput); + +#endif // defined(_MSC_VER)
\ No newline at end of file diff --git a/src/armnn/test/UnitTests.hpp b/src/armnn/test/UnitTests.hpp new file mode 100644 index 0000000000..040048ad99 --- /dev/null +++ b/src/armnn/test/UnitTests.hpp @@ -0,0 +1,79 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Logging.hpp" +#include "armnn/Utils.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/test/LayerTests.hpp" +#include <boost/test/unit_test.hpp> + +inline void ConfigureLoggingTest() +{ + // Configure logging for both the ARMNN library and this test program + armnn::ConfigureLogging(true, true, armnn::LogSeverity::Fatal); + armnnUtils::ConfigureLogging(boost::log::core::get().get(), true, true, armnn::LogSeverity::Fatal); +} + +// The following macros require the caller to have defined FactoryType, with one of the following using statements: +// +// using FactoryType = armnn::RefWorkloadFactory; +// using FactoryType = armnn::ClWorkloadFactory; +// using FactoryType = armnn::NeonWorkloadFactory; + +/// Executes BOOST_TEST on CompareTensors() return value so that the predicate_result message is reported. +/// If the test reports itself as not supported then the tensors are not compared. +/// Additionally this checks that the supportedness reported by the test matches the name of the test. +/// Unsupported tests must be 'tagged' by including "UNSUPPORTED" in their name. +/// This is useful because it clarifies that the feature being tested is not actually supported +/// (a passed test with the name of a feature would imply that feature was supported). +/// If support is added for a feature, the test case will fail because the name incorrectly contains UNSUPPORTED. +/// If support is removed for a feature, the test case will fail because the name doesn't contain UNSUPPORTED. +template <typename T, std::size_t n> +void CompareTestResultIfSupported(const std::string& testName, LayerTestResult<T, n> testResult) +{ + bool testNameIndicatesUnsupported = testName.find("UNSUPPORTED") != std::string::npos; + BOOST_CHECK_MESSAGE(testNameIndicatesUnsupported != testResult.supported, + "The test name does not match the supportedness it is reporting"); + if (testResult.supported) + { + BOOST_TEST(CompareTensors(testResult.output, testResult.outputExpected)); + } +} + +template<typename FactoryType, typename TFuncPtr, typename... Args> +void RunTestFunction(const char* testName, TFuncPtr testFunction, Args... args) +{ + FactoryType workloadFactory; + auto testResult = (*testFunction)(workloadFactory, args...); + CompareTestResultIfSupported(testName, testResult); +} + +#define ARMNN_AUTO_TEST_CASE(TestName, TestFunction, ...) \ + BOOST_AUTO_TEST_CASE(TestName) \ + { \ + RunTestFunction<FactoryType>(#TestName, &TestFunction, ##__VA_ARGS__); \ + } + +template<typename FactoryType, typename TFuncPtr, typename... Args> +void CompareRefTestFunction(const char* testName, TFuncPtr testFunction, Args... args) +{ + FactoryType workloadFactory; + armnn::RefWorkloadFactory refWorkloadFactory; + auto testResult = (*testFunction)(workloadFactory, refWorkloadFactory, args...); + CompareTestResultIfSupported(testName, testResult); +} + +#define ARMNN_COMPARE_REF_AUTO_TEST_CASE(TestName, TestFunction, ...) \ + BOOST_AUTO_TEST_CASE(TestName) \ + { \ + CompareRefTestFunction<FactoryType>(#TestName, &TestFunction, ##__VA_ARGS__); \ + } + +#define ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(TestName, Fixture, TestFunction, ...) \ + BOOST_FIXTURE_TEST_CASE(TestName, Fixture) \ + { \ + CompareRefTestFunction<FactoryType>(#TestName, &TestFunction, ##__VA_ARGS__); \ + } diff --git a/src/armnn/test/UtilsTests.cpp b/src/armnn/test/UtilsTests.cpp new file mode 100644 index 0000000000..11fa51626c --- /dev/null +++ b/src/armnn/test/UtilsTests.cpp @@ -0,0 +1,58 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include <armnn/Utils.hpp> +#include <armnn/Types.hpp> +#include <armnn/TypesUtils.hpp> +#include <armnn/Descriptors.hpp> + +BOOST_AUTO_TEST_SUITE(Utils) + +BOOST_AUTO_TEST_CASE(DataTypeSize) +{ + BOOST_TEST(armnn::GetDataTypeSize(armnn::DataType::Float32) == 4); + BOOST_TEST(armnn::GetDataTypeSize(armnn::DataType::QuantisedAsymm8) == 1); + BOOST_TEST(armnn::GetDataTypeSize(armnn::DataType::Signed32) == 4); +} + +BOOST_AUTO_TEST_CASE(GetDataTypeTest) +{ + BOOST_TEST((armnn::GetDataType<float>() == armnn::DataType::Float32)); + BOOST_TEST((armnn::GetDataType<uint8_t>() == armnn::DataType::QuantisedAsymm8)); + BOOST_TEST((armnn::GetDataType<int32_t>() == armnn::DataType::Signed32)); +} + +BOOST_AUTO_TEST_CASE(PermuteDescriptorWithTooManyMappings) +{ + BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 0u, 1u, 2u, 3u, 4u }), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(PermuteDescriptorWithInvalidMappings1d) +{ + BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 1u }), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(PermuteDescriptorWithInvalidMappings2d) +{ + BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 2u, 0u }), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(PermuteDescriptorWithInvalidMappings3d) +{ + BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 0u, 3u, 1u }), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(PermuteDescriptorWithInvalidMappings4d) +{ + BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 0u, 1u, 2u, 4u }), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(PermuteDescriptorWithDuplicatedMappings) +{ + BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 1u, 1u, 0u }), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_SUITE_END() |