From b4540bef0b0327683fe8e63f727c1212800dc2a9 Mon Sep 17 00:00:00 2001 From: David Beck Date: Mon, 24 Sep 2018 13:18:27 +0100 Subject: IVGCVSW-1898 : Ref backend folder structure * Reference backend is renamed to backends/reference as per https://confluence.arm.com/display/MLENG/Pluggable+backends Change-Id: I27a13c274eb60995dfb459e3c49c0e2f60bcd32c --- src/backends/reference/CMakeLists.txt | 19 + src/backends/reference/RefLayerSupport.cpp | 398 +++++++++++++++++++++ src/backends/reference/RefLayerSupport.hpp | 155 ++++++++ src/backends/reference/RefWorkloadFactory.cpp | 255 +++++++++++++ src/backends/reference/RefWorkloadFactory.hpp | 148 ++++++++ src/backends/reference/backend.cmake | 7 + src/backends/reference/backend.mk | 54 +++ src/backends/reference/test/CMakeLists.txt | 4 + src/backends/reference/workloads/Activation.cpp | 91 +++++ src/backends/reference/workloads/Activation.hpp | 20 ++ .../reference/workloads/ArithmeticFunction.cpp | 29 ++ .../reference/workloads/ArithmeticFunction.hpp | 24 ++ src/backends/reference/workloads/BatchNormImpl.hpp | 56 +++ src/backends/reference/workloads/Broadcast.cpp | 33 ++ src/backends/reference/workloads/Broadcast.hpp | 58 +++ src/backends/reference/workloads/CMakeLists.txt | 102 ++++++ src/backends/reference/workloads/ConvImpl.cpp | 71 ++++ src/backends/reference/workloads/ConvImpl.hpp | 187 ++++++++++ .../reference/workloads/FullyConnected.cpp | 62 ++++ .../reference/workloads/FullyConnected.hpp | 22 ++ src/backends/reference/workloads/Merger.hpp | 82 +++++ src/backends/reference/workloads/Pooling2d.cpp | 241 +++++++++++++ src/backends/reference/workloads/Pooling2d.hpp | 21 ++ .../workloads/RefActivationFloat32Workload.cpp | 28 ++ .../workloads/RefActivationFloat32Workload.hpp | 20 ++ .../workloads/RefActivationUint8Workload.cpp | 38 ++ .../workloads/RefActivationUint8Workload.hpp | 21 ++ .../reference/workloads/RefArithmeticWorkload.cpp | 69 ++++ .../reference/workloads/RefArithmeticWorkload.hpp | 122 +++++++ .../workloads/RefBaseConstantWorkload.cpp | 49 +++ .../workloads/RefBaseConstantWorkload.hpp | 33 ++ .../RefBatchNormalizationFloat32Workload.cpp | 38 ++ .../RefBatchNormalizationFloat32Workload.hpp | 28 ++ .../RefBatchNormalizationUint8Workload.cpp | 47 +++ .../RefBatchNormalizationUint8Workload.hpp | 28 ++ .../workloads/RefConstantFloat32Workload.cpp | 19 + .../workloads/RefConstantFloat32Workload.hpp | 20 ++ .../workloads/RefConstantUint8Workload.cpp | 19 + .../workloads/RefConstantUint8Workload.hpp | 20 ++ .../workloads/RefConvertFp16ToFp32Workload.cpp | 25 ++ .../workloads/RefConvertFp16ToFp32Workload.hpp | 21 ++ .../workloads/RefConvertFp32ToFp16Workload.cpp | 29 ++ .../workloads/RefConvertFp32ToFp16Workload.hpp | 21 ++ .../workloads/RefConvolution2dFloat32Workload.cpp | 37 ++ .../workloads/RefConvolution2dFloat32Workload.hpp | 27 ++ .../workloads/RefConvolution2dUint8Workload.cpp | 45 +++ .../workloads/RefConvolution2dUint8Workload.hpp | 28 ++ .../RefDepthwiseConvolution2dFloat32Workload.cpp | 37 ++ .../RefDepthwiseConvolution2dFloat32Workload.hpp | 27 ++ .../RefDepthwiseConvolution2dUint8Workload.cpp | 46 +++ .../RefDepthwiseConvolution2dUint8Workload.hpp | 26 ++ .../RefFakeQuantizationFloat32Workload.cpp | 42 +++ .../RefFakeQuantizationFloat32Workload.hpp | 21 ++ .../workloads/RefFloorFloat32Workload.cpp | 29 ++ .../workloads/RefFloorFloat32Workload.hpp | 21 ++ .../workloads/RefFullyConnectedFloat32Workload.cpp | 43 +++ .../workloads/RefFullyConnectedFloat32Workload.hpp | 26 ++ .../workloads/RefFullyConnectedUint8Workload.cpp | 66 ++++ .../workloads/RefFullyConnectedUint8Workload.hpp | 26 ++ .../RefL2NormalizationFloat32Workload.cpp | 61 ++++ .../RefL2NormalizationFloat32Workload.hpp | 21 ++ .../reference/workloads/RefLstmFloat32Workload.cpp | 16 + .../reference/workloads/RefLstmFloat32Workload.hpp | 21 ++ .../workloads/RefMergerFloat32Workload.cpp | 21 ++ .../workloads/RefMergerFloat32Workload.hpp | 21 ++ .../reference/workloads/RefMergerUint8Workload.cpp | 21 ++ .../reference/workloads/RefMergerUint8Workload.hpp | 21 ++ .../workloads/RefNormalizationFloat32Workload.cpp | 185 ++++++++++ .../workloads/RefNormalizationFloat32Workload.hpp | 21 ++ .../reference/workloads/RefPermuteWorkload.cpp | 32 ++ .../reference/workloads/RefPermuteWorkload.hpp | 33 ++ .../workloads/RefPooling2dFloat32Workload.cpp | 33 ++ .../workloads/RefPooling2dFloat32Workload.hpp | 21 ++ .../workloads/RefPooling2dUint8Workload.cpp | 37 ++ .../workloads/RefPooling2dUint8Workload.hpp | 21 ++ .../workloads/RefReshapeFloat32Workload.cpp | 27 ++ .../workloads/RefReshapeFloat32Workload.hpp | 21 ++ .../workloads/RefReshapeUint8Workload.cpp | 27 ++ .../workloads/RefReshapeUint8Workload.hpp | 21 ++ .../workloads/RefResizeBilinearFloat32Workload.cpp | 29 ++ .../workloads/RefResizeBilinearFloat32Workload.hpp | 21 ++ .../workloads/RefResizeBilinearUint8Workload.cpp | 33 ++ .../workloads/RefResizeBilinearUint8Workload.hpp | 21 ++ .../workloads/RefSoftmaxFloat32Workload.cpp | 26 ++ .../workloads/RefSoftmaxFloat32Workload.hpp | 21 ++ .../workloads/RefSoftmaxUint8Workload.cpp | 36 ++ .../workloads/RefSoftmaxUint8Workload.hpp | 21 ++ .../workloads/RefSplitterFloat32Workload.cpp | 21 ++ .../workloads/RefSplitterFloat32Workload.hpp | 21 ++ .../workloads/RefSplitterUint8Workload.cpp | 21 ++ .../workloads/RefSplitterUint8Workload.hpp | 21 ++ .../reference/workloads/RefWorkloadUtils.hpp | 138 +++++++ src/backends/reference/workloads/RefWorkloads.hpp | 53 +++ .../reference/workloads/ResizeBilinear.cpp | 92 +++++ .../reference/workloads/ResizeBilinear.hpp | 15 + src/backends/reference/workloads/Softmax.cpp | 49 +++ src/backends/reference/workloads/Softmax.hpp | 16 + src/backends/reference/workloads/Splitter.hpp | 84 +++++ .../reference/workloads/TensorBufferArrayView.hpp | 42 +++ 99 files changed, 4844 insertions(+) create mode 100644 src/backends/reference/CMakeLists.txt create mode 100644 src/backends/reference/RefLayerSupport.cpp create mode 100644 src/backends/reference/RefLayerSupport.hpp create mode 100644 src/backends/reference/RefWorkloadFactory.cpp create mode 100644 src/backends/reference/RefWorkloadFactory.hpp create mode 100644 src/backends/reference/backend.cmake create mode 100644 src/backends/reference/backend.mk create mode 100644 src/backends/reference/test/CMakeLists.txt create mode 100644 src/backends/reference/workloads/Activation.cpp create mode 100644 src/backends/reference/workloads/Activation.hpp create mode 100644 src/backends/reference/workloads/ArithmeticFunction.cpp create mode 100644 src/backends/reference/workloads/ArithmeticFunction.hpp create mode 100644 src/backends/reference/workloads/BatchNormImpl.hpp create mode 100644 src/backends/reference/workloads/Broadcast.cpp create mode 100644 src/backends/reference/workloads/Broadcast.hpp create mode 100644 src/backends/reference/workloads/CMakeLists.txt create mode 100644 src/backends/reference/workloads/ConvImpl.cpp create mode 100644 src/backends/reference/workloads/ConvImpl.hpp create mode 100644 src/backends/reference/workloads/FullyConnected.cpp create mode 100644 src/backends/reference/workloads/FullyConnected.hpp create mode 100644 src/backends/reference/workloads/Merger.hpp create mode 100644 src/backends/reference/workloads/Pooling2d.cpp create mode 100644 src/backends/reference/workloads/Pooling2d.hpp create mode 100644 src/backends/reference/workloads/RefActivationFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefActivationFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefActivationUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefActivationUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefArithmeticWorkload.cpp create mode 100644 src/backends/reference/workloads/RefArithmeticWorkload.hpp create mode 100644 src/backends/reference/workloads/RefBaseConstantWorkload.cpp create mode 100644 src/backends/reference/workloads/RefBaseConstantWorkload.hpp create mode 100644 src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefConstantFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefConstantFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefConstantUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefConstantUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp create mode 100644 src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp create mode 100644 src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp create mode 100644 src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp create mode 100644 src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefFloorFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefFloorFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefFullyConnectedFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefFullyConnectedUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefL2NormalizationFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefLstmFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefLstmFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefMergerFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefMergerFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefMergerUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefMergerUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefPermuteWorkload.cpp create mode 100644 src/backends/reference/workloads/RefPermuteWorkload.hpp create mode 100644 src/backends/reference/workloads/RefPooling2dFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefPooling2dUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefPooling2dUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefReshapeFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefReshapeFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefReshapeUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefReshapeUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefResizeBilinearFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefResizeBilinearUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefSoftmaxFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefSoftmaxUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefSplitterFloat32Workload.cpp create mode 100644 src/backends/reference/workloads/RefSplitterFloat32Workload.hpp create mode 100644 src/backends/reference/workloads/RefSplitterUint8Workload.cpp create mode 100644 src/backends/reference/workloads/RefSplitterUint8Workload.hpp create mode 100644 src/backends/reference/workloads/RefWorkloadUtils.hpp create mode 100644 src/backends/reference/workloads/RefWorkloads.hpp create mode 100644 src/backends/reference/workloads/ResizeBilinear.cpp create mode 100644 src/backends/reference/workloads/ResizeBilinear.hpp create mode 100644 src/backends/reference/workloads/Softmax.cpp create mode 100644 src/backends/reference/workloads/Softmax.hpp create mode 100644 src/backends/reference/workloads/Splitter.hpp create mode 100644 src/backends/reference/workloads/TensorBufferArrayView.hpp (limited to 'src/backends/reference') diff --git a/src/backends/reference/CMakeLists.txt b/src/backends/reference/CMakeLists.txt new file mode 100644 index 0000000000..526baaa1a8 --- /dev/null +++ b/src/backends/reference/CMakeLists.txt @@ -0,0 +1,19 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnRefBackend_sources + RefLayerSupport.cpp + RefLayerSupport.hpp + RefWorkloadFactory.cpp + RefWorkloadFactory.hpp +) + +add_library(armnnRefBackend STATIC ${armnnRefBackend_sources}) +target_include_directories(armnnRefBackend PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnRefBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnRefBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) + +add_subdirectory(workloads test) + diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp new file mode 100644 index 0000000000..d56cdebeda --- /dev/null +++ b/src/backends/reference/RefLayerSupport.cpp @@ -0,0 +1,398 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "LayerSupportCommon.hpp" +#include "RefLayerSupport.hpp" +#include +#include +#include + +#include +#include "InternalTypes.hpp" + +using namespace boost; + +namespace armnn +{ + +template +bool IsSupportedForDataTypeRef(std::string* reasonIfUnsupported, + DataType dataType, + Float32Func floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + return IsSupportedForDataTypeGeneric(reasonIfUnsupported, + dataType, + &FalseFunc, + floatFuncPtr, + uint8FuncPtr, + std::forward(params)...); +} + +bool IsActivationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsAdditionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsBatchNormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsConstantSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsConvolution2dSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + ignore_unused(output); + ignore_unused(weights); + ignore_unused(biases); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + ignore_unused(descriptor); + ignore_unused(weights); + ignore_unused(biases); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsDivisionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsSubtractionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFullyConnectedSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + ignore_unused(descriptor); + ignore_unused(weights); + ignore_unused(biases); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsInputSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsL2NormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsMergerSupportedRef(const std::vector inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsMultiplicationSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsNormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsOutputSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPermuteSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPooling2dSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsResizeBilinearSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsSoftmaxSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsSplitterSupportedRef(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFakeQuantizationSupportedRef(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsReshapeSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFloorSupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsLstmSupportedRef(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(outputStateIn); + ignore_unused(cellStateIn); + ignore_unused(scratchBuffer); + ignore_unused(outputStateOut); + ignore_unused(cellStateOut); + ignore_unused(output); + ignore_unused(descriptor); + ignore_unused(inputToForgetWeights); + ignore_unused(inputToCellWeights); + ignore_unused(inputToOutputWeights); + ignore_unused(recurrentToForgetWeights); + ignore_unused(recurrentToCellWeights); + ignore_unused(recurrentToOutputWeights); + ignore_unused(forgetGateBias); + ignore_unused(cellBias); + ignore_unused(outputGateBias); + ignore_unused(inputToInputWeights); + ignore_unused(recurrentToInputWeights); + ignore_unused(cellToInputWeights); + ignore_unused(inputGateBias); + ignore_unused(projectionWeights); + ignore_unused(projectionBias); + ignore_unused(cellToForgetWeights); + ignore_unused(cellToOutputWeights); + return false; +} + +bool IsConvertFp16ToFp32SupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return (IsSupportedForDataTypeGeneric(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseInputFuncF32<>, + &FalseFuncU8<>) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + output.GetDataType(), + &FalseOutputFuncF16<>, + &TrueFunc<>, + &FalseFuncU8<>)); +} + +bool IsConvertFp32ToFp16SupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return (IsSupportedForDataTypeGeneric(reasonIfUnsupported, + input.GetDataType(), + &FalseInputFuncF16<>, + &TrueFunc<>, + &FalseFuncU8<>) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &FalseOutputFuncF32<>, + &FalseFuncU8<>)); +} + +bool IsMeanSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return false; +} + +} diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp new file mode 100644 index 0000000000..ff2e7e387f --- /dev/null +++ b/src/backends/reference/RefLayerSupport.hpp @@ -0,0 +1,155 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace armnn +{ + +bool IsActivationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsAdditionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsBatchNormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConstantSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvolution2dSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDivisionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsSubtractionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsFullyConnectedSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsInputSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsL2NormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsLstmSupportedRef(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); + +bool IsMergerSupportedRef(const std::vector inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsMultiplicationSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsNormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsOutputSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsPermuteSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPooling2dSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsResizeBilinearSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsSoftmaxSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsSplitterSupportedRef(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsFakeQuantizationSupportedRef(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsReshapeSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsFloorSupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp16ToFp32SupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp32ToFp16SupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsMeanSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +} diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp new file mode 100644 index 0000000000..5cefd1b6e1 --- /dev/null +++ b/src/backends/reference/RefWorkloadFactory.cpp @@ -0,0 +1,255 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include +#include +#include +#include "RefWorkloadFactory.hpp" +#include "workloads/RefWorkloads.hpp" +#include "Layer.hpp" + +#include + +namespace armnn +{ + +template +std::unique_ptr RefWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor, + const WorkloadInfo& info) const +{ + return armnn::MakeWorkload(descriptor, info); +} + +RefWorkloadFactory::RefWorkloadFactory() +{ +} + +bool RefWorkloadFactory::IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported) +{ + return IWorkloadFactory::IsLayerSupported(Compute::CpuRef, layer, dataType, outReasonIfUnsupported); +} + +std::unique_ptr RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return std::make_unique(tensorInfo); +} + +std::unique_ptr RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const +{ + return std::make_unique(tensorInfo); +} + +std::unique_ptr RefWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (info.m_InputTensorInfos.empty() ) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Input cannot be zero length"); + } + if (info.m_OutputTensorInfos.empty()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Output cannot be zero length"); + } + + if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateInput: data input and output differ in byte count."); + } + + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (info.m_InputTensorInfos.empty() ) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Input cannot be zero length"); + } + if (info.m_OutputTensorInfos.empty()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Output cannot be zero length"); + } + if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: data input and output differ in byte count."); + } + + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateFullyConnected( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateConvolution2d( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateNormalization( + const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateMultiplication( + const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty()) + { + throw InvalidArgumentException("RefWorkloadFactory: CreateMemCopy() expected an input tensor."); + } + return std::make_unique(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateDivision( + const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateSubtraction( + const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateMean( + const MeanQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + + +} // namespace armnn diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp new file mode 100644 index 0000000000..1a9227a978 --- /dev/null +++ b/src/backends/reference/RefWorkloadFactory.hpp @@ -0,0 +1,148 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include + +#include +#include + +namespace armnn +{ + +template +constexpr bool IsOperationQueueDescriptor(const QueueDescriptorType&) { return true; } + +template <> +constexpr bool IsOperationQueueDescriptor(const MemCopyQueueDescriptor&) { return false; } + +template <> +constexpr bool IsOperationQueueDescriptor(const ConstantQueueDescriptor&) { return false; } + +template <> +constexpr bool IsOperationQueueDescriptor(const PermuteQueueDescriptor&) { return false; } + +// Reference workload factory. +class RefWorkloadFactory : public IWorkloadFactory +{ +public: + explicit RefWorkloadFactory(); + virtual ~RefWorkloadFactory() {} + + virtual Compute GetCompute() const override { return Compute::CpuRef; } + + static bool IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const override { return false; } + + virtual std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override + { + boost::ignore_unused(parent, subTensorShape, subTensorOrigin); + return nullptr; + } + + virtual std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo) const override; + + virtual std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const override; + + virtual std::unique_ptr CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& Info) const override; + + virtual std::unique_ptr CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; +private: + + template + std::unique_ptr MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info) const; + +}; + +} // namespace armnn diff --git a/src/backends/reference/backend.cmake b/src/backends/reference/backend.cmake new file mode 100644 index 0000000000..95e72a438d --- /dev/null +++ b/src/backends/reference/backend.cmake @@ -0,0 +1,7 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/reference) +list(APPEND armnnLibraries armnnRefBackend armnnRefBackendWorkloads) diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk new file mode 100644 index 0000000000..23dab119d0 --- /dev/null +++ b/src/backends/reference/backend.mk @@ -0,0 +1,54 @@ +# +# Copyright © 2017 ARM Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +# BACKEND_SOURCES contains the list of files to be included +# in the Android build and it is picked up by the Android.mk +# file in the root of ArmNN + +BACKEND_SOURCES := \ + RefLayerSupport.cpp \ + RefWorkloadFactory.cpp \ + workloads/Activation.cpp \ + workloads/ArithmeticFunction.cpp \ + workloads/Broadcast.cpp \ + workloads/ConvImpl.cpp \ + workloads/FullyConnected.cpp \ + workloads/Pooling2d.cpp \ + workloads/RefActivationFloat32Workload.cpp \ + workloads/RefActivationUint8Workload.cpp \ + workloads/RefArithmeticWorkload.cpp \ + workloads/RefBaseConstantWorkload.cpp \ + workloads/RefBatchNormalizationFloat32Workload.cpp \ + workloads/RefBatchNormalizationUint8Workload.cpp \ + workloads/RefConstantFloat32Workload.cpp \ + workloads/RefConstantUint8Workload.cpp \ + workloads/RefConvertFp16ToFp32Workload.cpp \ + workloads/RefConvertFp32ToFp16Workload.cpp \ + workloads/RefConvolution2dFloat32Workload.cpp \ + workloads/RefConvolution2dUint8Workload.cpp \ + workloads/RefDepthwiseConvolution2dFloat32Workload.cpp \ + workloads/RefDepthwiseConvolution2dUint8Workload.cpp \ + workloads/RefFakeQuantizationFloat32Workload.cpp \ + workloads/RefFloorFloat32Workload.cpp \ + workloads/RefFullyConnectedFloat32Workload.cpp \ + workloads/RefFullyConnectedUint8Workload.cpp \ + workloads/RefL2NormalizationFloat32Workload.cpp \ + workloads/RefLstmFloat32Workload.cpp \ + workloads/RefMergerFloat32Workload.cpp \ + workloads/RefMergerUint8Workload.cpp \ + workloads/RefNormalizationFloat32Workload.cpp \ + workloads/RefPermuteWorkload.cpp \ + workloads/RefPooling2dFloat32Workload.cpp \ + workloads/RefPooling2dUint8Workload.cpp \ + workloads/RefReshapeFloat32Workload.cpp \ + workloads/RefReshapeUint8Workload.cpp \ + workloads/RefResizeBilinearFloat32Workload.cpp \ + workloads/RefResizeBilinearUint8Workload.cpp \ + workloads/RefSoftmaxFloat32Workload.cpp \ + workloads/RefSoftmaxUint8Workload.cpp \ + workloads/RefSplitterFloat32Workload.cpp \ + workloads/RefSplitterUint8Workload.cpp \ + workloads/ResizeBilinear.cpp \ + workloads/Softmax.cpp diff --git a/src/backends/reference/test/CMakeLists.txt b/src/backends/reference/test/CMakeLists.txt new file mode 100644 index 0000000000..f41a074999 --- /dev/null +++ b/src/backends/reference/test/CMakeLists.txt @@ -0,0 +1,4 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# diff --git a/src/backends/reference/workloads/Activation.cpp b/src/backends/reference/workloads/Activation.cpp new file mode 100644 index 0000000000..ef4903074b --- /dev/null +++ b/src/backends/reference/workloads/Activation.cpp @@ -0,0 +1,91 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Activation.hpp" + +#include + +#include + +namespace armnn +{ + +void Activation(const float* in, + float* out, + const TensorInfo& tensorInfo, + ActivationFunction function, + float a, + float b) +{ + for (size_t i = 0; i 0.0f ? input : (input * a); + break; + } + case ActivationFunction::Abs: + { + output = input < 0 ? -input : input; + break; + } + case ActivationFunction::Sqrt: + { + output = sqrtf(input); + break; + } + case ActivationFunction::Square: + { + output = input * input; + break; + } + case ActivationFunction::TanH: + { + output = a * tanhf(b * input); + break; + } + default: + { + BOOST_LOG_TRIVIAL(error) << "Unsupported activation function"; + return; + } + } + + out[i] = output; + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/Activation.hpp b/src/backends/reference/workloads/Activation.hpp new file mode 100644 index 0000000000..c8a23114f0 --- /dev/null +++ b/src/backends/reference/workloads/Activation.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +namespace armnn +{ + +/// Performs the ActivationFunction elementwise on the inputs to give the outputs. +void Activation(const float* in, + float* out, + const TensorInfo& tensorInfo, + ActivationFunction function, + float a, + float b); + +} //namespace armnn diff --git a/src/backends/reference/workloads/ArithmeticFunction.cpp b/src/backends/reference/workloads/ArithmeticFunction.cpp new file mode 100644 index 0000000000..fede138253 --- /dev/null +++ b/src/backends/reference/workloads/ArithmeticFunction.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ArithmeticFunction.hpp" +#include "Broadcast.hpp" +#include + +namespace armnn +{ + +template +ArithmeticFunction::ArithmeticFunction(const TensorShape& inShape0, + const TensorShape& inShape1, + const TensorShape& outShape, + const float* inData0, + const float* inData1, + float* outData) +{ + BroadcastLoop(inShape0, inShape1, outShape).Unroll(Functor(), 0, inData0, inData1, outData); +} + +} //namespace armnn + +template struct armnn::ArithmeticFunction>; +template struct armnn::ArithmeticFunction>; +template struct armnn::ArithmeticFunction>; +template struct armnn::ArithmeticFunction>; diff --git a/src/backends/reference/workloads/ArithmeticFunction.hpp b/src/backends/reference/workloads/ArithmeticFunction.hpp new file mode 100644 index 0000000000..eafb6444f6 --- /dev/null +++ b/src/backends/reference/workloads/ArithmeticFunction.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +template +struct ArithmeticFunction +{ + ArithmeticFunction(const TensorShape& inShape0, + const TensorShape& inShape1, + const TensorShape& outShape, + const float* inData0, + const float* inData1, + float* outData); +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/BatchNormImpl.hpp b/src/backends/reference/workloads/BatchNormImpl.hpp new file mode 100644 index 0000000000..a7579c8373 --- /dev/null +++ b/src/backends/reference/workloads/BatchNormImpl.hpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include + +#include + +namespace armnn +{ + +template +static void BatchNormImpl(NormData data, + const float* varIn, + const float* meanIn, + const float* gammaIn, + const float* betaIn, + float * outputData, + const float * inputData) +{ + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + for (unsigned int c = 0; c < inputInfo0.GetShape()[1]; c++) + { + float var = varIn[c]; + float mean = meanIn[c]; + float gamma = gammaIn[c]; + float beta = betaIn[c]; + + float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps); + float add = beta - mult * mean; + + for (unsigned int n = 0; n < inputInfo0.GetShape()[0]; n++) + { + for (unsigned int j = 0; j < inputInfo0.GetShape()[2]; j++) + { + for (unsigned int i = 0; i < inputInfo0.GetShape()[3]; i++) + { + unsigned int index = i + + j*inputInfo0.GetShape()[3] + + c*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] + + n*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] + * inputInfo0.GetShape()[1]; + + outputData[index] = mult * inputData[index] + add; + } + } + } + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/Broadcast.cpp b/src/backends/reference/workloads/Broadcast.cpp new file mode 100644 index 0000000000..8421a0a7ed --- /dev/null +++ b/src/backends/reference/workloads/Broadcast.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Broadcast.hpp" + +namespace armnn +{ + +BroadcastLoop::BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape) +: m_DimData(outShape.GetNumDimensions()) +{ + const unsigned int numDims = GetNumDimensions(); + + unsigned int sIn0 = 1; + unsigned int sIn1 = 1; + unsigned int sOut = 1; + + for (unsigned int j = numDims - 1, k = 0; k < numDims ; k++, j--) + { + m_DimData[j].m_DimSize = outShape[j]; + m_DimData[j].m_Stride1 = (inShape0[j] > 1) ? sIn0 : 0; + m_DimData[j].m_Stride2 = (inShape1[j] > 1) ? sIn1 : 0; + m_DimData[j].m_StrideOut = sOut; + + sIn0 *= inShape0[j]; + sIn1 *= inShape1[j]; + sOut *= outShape[j]; + } +} + +} // namespace armnn diff --git a/src/backends/reference/workloads/Broadcast.hpp b/src/backends/reference/workloads/Broadcast.hpp new file mode 100644 index 0000000000..e92ed0598d --- /dev/null +++ b/src/backends/reference/workloads/Broadcast.hpp @@ -0,0 +1,58 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +namespace armnn +{ + +struct BroadcastLoop +{ + BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape); + + unsigned int GetNumDimensions() + { + return static_cast(m_DimData.size()); + } + + template + void Unroll(Func operationFunc, + unsigned int dimension, + const T0* inData0, + const T1* inData1, + U* outData) + { + if (dimension >= GetNumDimensions()) + { + *outData = operationFunc(*inData0, *inData1); + return; + } + + for (unsigned int i = 0; i < m_DimData[dimension].m_DimSize; i++) + { + Unroll(operationFunc, dimension + 1, inData0, inData1, outData); + + inData0 += m_DimData[dimension].m_Stride1; + inData1 += m_DimData[dimension].m_Stride2; + outData += m_DimData[dimension].m_StrideOut; + } + } + +private: + // Struct to hold the dimension data. + struct BroadcastDimensionData + { + unsigned int m_DimSize; + unsigned int m_StrideOut; + unsigned int m_Stride1; + unsigned int m_Stride2; + }; + + std::vector m_DimData; +}; + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt new file mode 100644 index 0000000000..7343b70daf --- /dev/null +++ b/src/backends/reference/workloads/CMakeLists.txt @@ -0,0 +1,102 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnRefBackendWorkloads_sources + Activation.cpp + Activation.hpp + ArithmeticFunction.cpp + ArithmeticFunction.hpp + BatchNormImpl.hpp + Broadcast.cpp + Broadcast.hpp + ConvImpl.cpp + ConvImpl.hpp + FullyConnected.cpp + FullyConnected.hpp + Merger.hpp + Pooling2d.cpp + Pooling2d.hpp + RefActivationFloat32Workload.cpp + RefActivationFloat32Workload.hpp + RefActivationUint8Workload.cpp + RefActivationUint8Workload.hpp + RefArithmeticWorkload.cpp + RefArithmeticWorkload.hpp + RefBaseConstantWorkload.cpp + RefBaseConstantWorkload.hpp + RefBatchNormalizationFloat32Workload.cpp + RefBatchNormalizationFloat32Workload.hpp + RefBatchNormalizationUint8Workload.cpp + RefBatchNormalizationUint8Workload.hpp + RefConstantFloat32Workload.cpp + RefConstantFloat32Workload.hpp + RefConstantUint8Workload.cpp + RefConstantUint8Workload.hpp + RefConvertFp16ToFp32Workload.cpp + RefConvertFp16ToFp32Workload.hpp + RefConvertFp32ToFp16Workload.cpp + RefConvertFp32ToFp16Workload.hpp + RefConvolution2dFloat32Workload.cpp + RefConvolution2dFloat32Workload.hpp + RefConvolution2dUint8Workload.cpp + RefConvolution2dUint8Workload.hpp + RefDepthwiseConvolution2dFloat32Workload.cpp + RefDepthwiseConvolution2dFloat32Workload.hpp + RefDepthwiseConvolution2dUint8Workload.cpp + RefDepthwiseConvolution2dUint8Workload.hpp + RefFakeQuantizationFloat32Workload.cpp + RefFakeQuantizationFloat32Workload.hpp + RefFloorFloat32Workload.cpp + RefFloorFloat32Workload.hpp + RefFullyConnectedFloat32Workload.cpp + RefFullyConnectedFloat32Workload.hpp + RefFullyConnectedUint8Workload.cpp + RefFullyConnectedUint8Workload.hpp + RefL2NormalizationFloat32Workload.cpp + RefL2NormalizationFloat32Workload.hpp + RefLstmFloat32Workload.cpp + RefLstmFloat32Workload.hpp + RefMergerFloat32Workload.cpp + RefMergerFloat32Workload.hpp + RefMergerUint8Workload.cpp + RefMergerUint8Workload.hpp + RefNormalizationFloat32Workload.cpp + RefNormalizationFloat32Workload.hpp + RefPermuteWorkload.cpp + RefPermuteWorkload.hpp + RefPooling2dFloat32Workload.cpp + RefPooling2dFloat32Workload.hpp + RefPooling2dUint8Workload.cpp + RefPooling2dUint8Workload.hpp + RefReshapeFloat32Workload.cpp + RefReshapeFloat32Workload.hpp + RefReshapeUint8Workload.cpp + RefReshapeUint8Workload.hpp + RefResizeBilinearFloat32Workload.cpp + RefResizeBilinearFloat32Workload.hpp + RefResizeBilinearUint8Workload.cpp + RefResizeBilinearUint8Workload.hpp + RefSoftmaxFloat32Workload.cpp + RefSoftmaxFloat32Workload.hpp + RefSoftmaxUint8Workload.cpp + RefSoftmaxUint8Workload.hpp + RefSplitterFloat32Workload.cpp + RefSplitterFloat32Workload.hpp + RefSplitterUint8Workload.cpp + RefSplitterUint8Workload.hpp + RefWorkloads.hpp + RefWorkloadUtils.hpp + ResizeBilinear.cpp + ResizeBilinear.hpp + Softmax.cpp + Softmax.hpp + Splitter.hpp + TensorBufferArrayView.hpp +) + +add_library(armnnRefBackendWorkloads STATIC ${armnnRefBackendWorkloads_sources}) +target_include_directories(armnnRefBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnRefBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnRefBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/reference/workloads/ConvImpl.cpp b/src/backends/reference/workloads/ConvImpl.cpp new file mode 100644 index 0000000000..8743a2bd0d --- /dev/null +++ b/src/backends/reference/workloads/ConvImpl.cpp @@ -0,0 +1,71 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ConvImpl.hpp" + +#include + +#include +#include + +namespace armnn +{ + +QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier) +{ + BOOST_ASSERT(multiplier >= 0.0f && multiplier < 1.0f); + if (multiplier == 0.0f) + { + m_Multiplier = 0; + m_RightShift = 0; + } + else + { + const double q = std::frexp(multiplier, &m_RightShift); + m_RightShift = -m_RightShift; + int64_t qFixed = static_cast(std::round(q * (1ll << 31))); + BOOST_ASSERT(qFixed <= (1ll << 31)); + if (qFixed == (1ll << 31)) + { + qFixed /= 2; + --m_RightShift; + } + BOOST_ASSERT(m_RightShift >= 0); + BOOST_ASSERT(qFixed <= std::numeric_limits::max()); + m_Multiplier = static_cast(qFixed); + } +} + +int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const +{ + int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier); + return RoundingDivideByPOT(x, m_RightShift); +} + +int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b) +{ + // Check for overflow. + if (a == b && a == std::numeric_limits::min()) + { + return std::numeric_limits::max(); + } + int64_t a_64(a); + int64_t b_64(b); + int64_t ab_64 = a_64 * b_64; + int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); + int32_t ab_x2_high32 = static_cast((ab_64 + nudge) / (1ll << 31)); + return ab_x2_high32; +} + +int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent) +{ + BOOST_ASSERT(exponent >= 0 && exponent <= 31); + int32_t mask = (1 << exponent) - 1; + int32_t remainder = x & mask; + int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0); + return (x >> exponent) + (remainder > threshold ? 1 : 0); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp new file mode 100644 index 0000000000..4c9ab2a644 --- /dev/null +++ b/src/backends/reference/workloads/ConvImpl.hpp @@ -0,0 +1,187 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include + +#include +#include + +#include +#include + +namespace armnn +{ + +/// Performs multiplication of an integer with a multiplier which is less than one, +/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. +struct QuantizedMultiplierSmallerThanOne +{ +public: + /// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier. + /// This stores the appropriate integer quantities (derived from the given multiplier) for later use. + /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne(). + QuantizedMultiplierSmallerThanOne(float multiplier); + + /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne(). + int32_t operator*(int32_t rhs) const; + +private: + /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul(). + static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b); + + /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT(). + static int32_t RoundingDivideByPOT(int32_t x, int exponent); + + int32_t m_Multiplier; + int32_t m_RightShift; +}; + +/// An implementation shared by normal and depthwise convolution. +template +static void ConvImpl(ConvData data, + const InputType* inputData, + float inputScale, + int32_t inputOffset, + const InputType* filterData, + float filterScale, + int32_t filterOffset, + const BiasType* biasData, + InputType* outputData, + float outputScale, + int32_t outputOffset, + const TensorInfo& filterInfo, + bool depthwise = false) +{ + if (data.m_Parameters.m_BiasEnabled && !biasData) + { + throw InvalidArgumentException("Bias is enabled but the bias data is invalid"); + } + + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + + unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1; + unsigned int channelsInput = filterInfo.GetShape()[1]; + unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0]; + + unsigned int batchSize = outputInfo0.GetShape()[0]; + unsigned int heightOutput = outputInfo0.GetShape()[2]; + unsigned int widthOutput = outputInfo0.GetShape()[3]; + unsigned int heightInput = inputInfo0.GetShape()[2]; + unsigned int widthInput = inputInfo0.GetShape()[3]; + + unsigned int heightFilter = filterInfo.GetShape()[2]; + unsigned int widthFilter = filterInfo.GetShape()[3]; + + unsigned int paddingTop = data.m_Parameters.m_PadTop; + unsigned int paddingLeft = data.m_Parameters.m_PadLeft; + unsigned int hStride = data.m_Parameters.m_StrideY; + unsigned int xStride = data.m_Parameters.m_StrideX; + + // The world's least efficient convolution. + for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) + { + for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++) + { + for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++) + { + for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++) + { + // This loop goes over each output element. + AccumulatorType sum = AccumulatorType(); + + // For depthwise, each output channel corresponds to exactly one input channel. + // For normal, must loop over each input channel. + for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++) + { + unsigned int depthwiseMultiplierIdx = 0; + if (depthwise) + { + cInput = cOutput / depthMult; + depthwiseMultiplierIdx = cOutput % depthMult; + } + + for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++) + { + for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++) + { + // This loop goes over each input element for each output element. + + unsigned int filterIndex; + + // Since dimensionality of kernel depends on depthwiseness, so does index. + if (depthwise) + { + filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput + + cInput * widthFilter * heightFilter + + yFilter * widthFilter + + xFilter; + } + else + { + filterIndex = cOutput * widthFilter * heightFilter * channelsInput + + cInput * widthFilter * heightFilter + + yFilter * widthFilter + + xFilter; + } + AccumulatorType filterValue = filterData[filterIndex] - + boost::numeric_cast(filterOffset); + + unsigned int yInput = yOutput * hStride + yFilter; + unsigned int xInput = xOutput * xStride + xFilter; + + AccumulatorType inputValue; + + // Check if we're in the padding. + if (yInput < paddingTop || yInput >= heightInput + paddingTop || + xInput < paddingLeft || xInput >= widthInput + paddingLeft ) + { + inputValue = AccumulatorType(); + } + else + { + inputValue = inputData[batchIdx * widthInput * heightInput * channelsInput + + widthInput * heightInput * cInput + + widthInput * (yInput - paddingTop) + + xInput - paddingLeft] - + boost::numeric_cast(inputOffset); + } + sum += filterValue * inputValue; + } + } + } + + if (data.m_Parameters.m_BiasEnabled) + { + sum += biasData[cOutput]; + } + + if (outputScale != 0.0f) + { + float multiplier = (inputScale * filterScale) / outputScale; + // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent + // with the AndroidNN CPU implementation. This should be (roughly) equivalent to: + // sum = std::round(multiplier * sum + outputOffset); + sum = boost::numeric_cast( + QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast(sum)) + + boost::numeric_cast(outputOffset); + sum = std::min(std::max(sum, 0), 255); + } + + outputData[batchIdx * widthOutput * heightOutput * channelsOutput + + widthOutput * heightOutput * cOutput + + widthOutput * yOutput + + xOutput] = boost::numeric_cast(sum); + } + } + } + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/FullyConnected.cpp b/src/backends/reference/workloads/FullyConnected.cpp new file mode 100644 index 0000000000..bf5814d2ad --- /dev/null +++ b/src/backends/reference/workloads/FullyConnected.cpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "FullyConnected.hpp" + +#include + +namespace armnn +{ + +void FullyConnected(const float* inputData, + float* outputData, + const TensorInfo& inputTensorInfo, + const TensorInfo& outputTensorInfo, + const float* weightData, + const float* biasData, + bool transposeWeights) +{ + unsigned int N = outputTensorInfo.GetShape()[1]; // Outputs Vector Size. + + BOOST_ASSERT(inputTensorInfo.GetNumDimensions() > 1); // Needs some data. + + unsigned int K = 1; // Total number of activations in the input. + for (unsigned int i = 1; i < inputTensorInfo.GetNumDimensions(); i++) + { + K *= inputTensorInfo.GetShape()[i]; + } + + for (unsigned int n = 0; n < inputTensorInfo.GetShape()[0]; n++) + { + for (unsigned int channelOutput = 0; channelOutput < N; channelOutput++) + { + float outval = 0.f; + + for (unsigned int channelInput = 0; channelInput < K; channelInput++) + { + float weight; + if (transposeWeights) + { + weight = weightData[channelOutput * K + channelInput]; + } + else + { + weight = weightData[channelInput * N + channelOutput]; + } + + outval += weight * inputData[n * K + channelInput]; + } + + if (biasData) + { + outval += biasData[channelOutput]; + } + + outputData[n * N + channelOutput] = outval; + } + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/FullyConnected.hpp b/src/backends/reference/workloads/FullyConnected.hpp new file mode 100644 index 0000000000..623259f8f8 --- /dev/null +++ b/src/backends/reference/workloads/FullyConnected.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +/// Performs a matrix multiplication and optionally adds a bias. +void FullyConnected(const float* inputData, + float* outputData, + const TensorInfo& inputTensorInfo, + const TensorInfo& outputTensorInfo, + const float* weightData, + const float* biasData, + bool transposeWeights); + +} //namespace armnn diff --git a/src/backends/reference/workloads/Merger.hpp b/src/backends/reference/workloads/Merger.hpp new file mode 100644 index 0000000000..867925faa2 --- /dev/null +++ b/src/backends/reference/workloads/Merger.hpp @@ -0,0 +1,82 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include "backends/WorkloadData.hpp" + +#include + +namespace armnn +{ + +template +void Merger(const MergerQueueDescriptor& data) +{ + const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + + for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index) + { + unsigned int indices[MaxNumOfTensorDimensions] = { 0 }; + + unsigned int indexRemainder = index; + unsigned int dimensionStride = outputInfo0.GetNumElements(); + + for (unsigned int i=0; i= view.m_Origin[i] + inputInfo.GetShape()[i]) + { + insideView = false; + } + } + + if (insideView) + { + unsigned int inIndex = 0; + unsigned int dimensionStride = 1; + + for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;) + { + inIndex += dimensionStride * (indices[i] - view.m_Origin[i]); + dimensionStride *= inputInfo.GetShape()[i]; + } + + //We are within the view, copy input data to the output corresponding to this view. + (GetOutputTensorData(0, data))[index] = + (GetInputTensorData(viewIdx, data))[inIndex]; + + //What should we do if input views overlap on the output tensor? + //We could error, take the average, or shm else... + //For now just stop after finding first view (input) that matches. + break; + } + } + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/Pooling2d.cpp b/src/backends/reference/workloads/Pooling2d.cpp new file mode 100644 index 0000000000..5812a290e7 --- /dev/null +++ b/src/backends/reference/workloads/Pooling2d.cpp @@ -0,0 +1,241 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Pooling2d.hpp" + +#include +#include + +#include + +#include +#include +#include + +namespace +{ + using PoolingAlgorithm = armnn::PoolingAlgorithm; + + float DefaultInitializer(PoolingAlgorithm algorithm) + { + switch (algorithm) + { + case PoolingAlgorithm::Max: + { + return std::numeric_limits::lowest(); + } + case PoolingAlgorithm::Average: + case PoolingAlgorithm::L2: + { + return 0.0f; + } + default: + { + throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); + } + } + } + + using Accumulator = std::function; + + Accumulator GetAccumulator(PoolingAlgorithm algorithm) + { + switch (algorithm) + { + case PoolingAlgorithm::Max: + { + return [](float & accu, float value) { + if (value > accu) { + accu = value; + } + }; + } + + case PoolingAlgorithm::Average: + { + return [](float & accu, float value) { + accu += value; + }; + } + + case PoolingAlgorithm::L2: + { + return [](float & accu, float value) { + accu += (value*value); + }; + } + + default: + { + throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); + } + } + } + + using Executor = std::function; + + Executor GetExecutor(PoolingAlgorithm algorithm) + { + switch (algorithm) + { + case PoolingAlgorithm::Max: + { + return [](float & accumulated, float kernelSize) {}; + } + + case PoolingAlgorithm::Average: + { + return [](float & accumulated, float kernelSize) { + accumulated /= kernelSize; + }; + } + + case PoolingAlgorithm::L2: + { + return [](float & accumulated, float kernelSize) { + accumulated = sqrtf(accumulated / kernelSize); + }; + } + + default: + { + throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); + } + } + } + + bool OnPaddingOnly(int start, int end, int maxRange, int padding) + { + if (end <= 0 || start > (maxRange - padding)) + { + return true; + } + else + { + return false; + } + } + + + bool ClampRange(int & start, int & end, int maxRange) + { + if (start < 0 || end > maxRange) + { + start = std::min(std::max(start, 0), maxRange); + end = std::min(std::max(end, 0), maxRange); + return true; + } + else + { + return false; + } + } +} + +namespace armnn +{ + +void Pooling2d(const float* in, + float* out, + const TensorInfo& inputInfo, + const TensorInfo& outputInfo, + const Pooling2dDescriptor& params) +{ + const int batchSize = boost::numeric_cast(outputInfo.GetShape()[0]); + const int channels = boost::numeric_cast(outputInfo.GetShape()[1]); + const int heightOutput = boost::numeric_cast(outputInfo.GetShape()[2]); + const int widthOutput = boost::numeric_cast(outputInfo.GetShape()[3]); + const int heightInput = boost::numeric_cast(inputInfo.GetShape()[2]); + const int widthInput = boost::numeric_cast(inputInfo.GetShape()[3]); + const int padLeft = boost::numeric_cast(params.m_PadLeft); + const int padRight = boost::numeric_cast(params.m_PadRight); + const int padTop = boost::numeric_cast(params.m_PadTop); + const int padBottom = boost::numeric_cast(params.m_PadBottom); + const int strideX = boost::numeric_cast(params.m_StrideX); + const int strideY = boost::numeric_cast(params.m_StrideY); + const int poolHeight = boost::numeric_cast(params.m_PoolHeight); + const int poolWidth = boost::numeric_cast(params.m_PoolWidth); + + float defaultInitializer = DefaultInitializer(params.m_PoolType); + + Accumulator accumulate = GetAccumulator(params.m_PoolType); + Executor execute = GetExecutor(params.m_PoolType); + + // Check supported padding methods outside the loop to simplify + // the inner loop. + if (params.m_PaddingMethod != PaddingMethod::Exclude && + params.m_PaddingMethod != PaddingMethod::IgnoreValue) + { + throw armnn::InvalidArgumentException("Unsupported padding type"); + } + + for (int n = 0; n < batchSize; n++) + { + for (int c = 0; c < channels; c++) + { + for (int yOutput = 0; yOutput < heightOutput; yOutput++) + { + for (int xOutput = 0; xOutput < widthOutput; xOutput++) + { + int hstart = (yOutput * strideY) - padTop; + int wstart = (xOutput * strideX) - padLeft; + int hend = hstart + poolHeight; + int wend = wstart + poolWidth; + + // Clamp the pooling region inside the valid input area (which includes the padding). + // This is necessary because the final pooling in a row may overlap beyond the padding. + hend = std::min(hend, heightInput + padBottom); + wend = std::min(wend, widthInput + padRight); + + float result = defaultInitializer; + float poolAreaSize = boost::numeric_cast((hend - hstart) * (wend - wstart)); + + // Special case: when the pooling kernel is over a padding region and the padding + // size is larger or equal to the kernel and the kernel only covers + // padding and no real values, then we initialize the result as zero + // by convention. This is because we need to choose a value here and + // all values we have are padding, which we ignore. + if (OnPaddingOnly(hstart, hend, heightInput, padBottom) || + OnPaddingOnly(wstart, wend, widthInput, padRight)) + { + result = 0.0f; + } + + bool clamped = ClampRange(wstart, wend, widthInput); + clamped |= ClampRange(hstart, hend, heightInput); + + if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude) + { + // When we exclude the padding, it means we calculate with a smaller + // kernel size, so I changed the divisor here. + poolAreaSize = boost::numeric_cast((hend - hstart) * (wend - wstart)); + } + + for (auto yInput = hstart; yInput < hend; yInput++) + { + for (auto xInput = wstart; xInput < wend; xInput++) + { + float inval = in[n * widthInput * heightInput * channels + + c * widthInput * heightInput + + yInput * widthInput + + xInput]; + + accumulate(result, inval); + } + } + + execute(result, poolAreaSize); + + out[n * widthOutput * heightOutput * channels + + c * widthOutput * heightOutput + + yOutput * widthOutput + + xOutput] = result; + } + } + } + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/Pooling2d.hpp b/src/backends/reference/workloads/Pooling2d.hpp new file mode 100644 index 0000000000..da56b25c4e --- /dev/null +++ b/src/backends/reference/workloads/Pooling2d.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +/// Computes the Pooling2d operation. +void Pooling2d(const float* in, + float* out, + const TensorInfo& inputInfo, + const TensorInfo& outputInfo, + const Pooling2dDescriptor& params); + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefActivationFloat32Workload.cpp b/src/backends/reference/workloads/RefActivationFloat32Workload.cpp new file mode 100644 index 0000000000..3cc59be7a4 --- /dev/null +++ b/src/backends/reference/workloads/RefActivationFloat32Workload.cpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefActivationFloat32Workload.hpp" + +#include "Activation.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefActivationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationFloat32Workload_Execute"); + + Activation(GetInputTensorDataFloat(0, m_Data), + GetOutputTensorDataFloat(0, m_Data), + GetTensorInfo(m_Data.m_Inputs[0]), + m_Data.m_Parameters.m_Function, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefActivationFloat32Workload.hpp b/src/backends/reference/workloads/RefActivationFloat32Workload.hpp new file mode 100644 index 0000000000..0de33f02ff --- /dev/null +++ b/src/backends/reference/workloads/RefActivationFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +namespace armnn +{ + +class RefActivationFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefActivationUint8Workload.cpp b/src/backends/reference/workloads/RefActivationUint8Workload.cpp new file mode 100644 index 0000000000..b95c2e22a8 --- /dev/null +++ b/src/backends/reference/workloads/RefActivationUint8Workload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefActivationUint8Workload.hpp" + +#include "Activation.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationUint8Workload_Execute"); + + const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo); + + std::vector results(tensorInfo.GetNumElements()); + + Activation(dequant.data(), + results.data(), + tensorInfo, + m_Data.m_Parameters.m_Function, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0])); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefActivationUint8Workload.hpp b/src/backends/reference/workloads/RefActivationUint8Workload.hpp new file mode 100644 index 0000000000..f38888a9f7 --- /dev/null +++ b/src/backends/reference/workloads/RefActivationUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefActivationUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefArithmeticWorkload.cpp b/src/backends/reference/workloads/RefArithmeticWorkload.cpp new file mode 100644 index 0000000000..6c39fa1186 --- /dev/null +++ b/src/backends/reference/workloads/RefArithmeticWorkload.cpp @@ -0,0 +1,69 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefArithmeticWorkload.hpp" +#include "ArithmeticFunction.hpp" +#include "RefWorkloadUtils.hpp" +#include "Profiling.hpp" +#include + +namespace armnn +{ + +template +void BaseFloat32ArithmeticWorkload::ExecuteImpl(const char * debugString) const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString); + + auto data = Float32Workload::GetData(); + const TensorShape& inShape0 = GetTensorInfo(data.m_Inputs[0]).GetShape(); + const TensorShape& inShape1 = GetTensorInfo(data.m_Inputs[1]).GetShape(); + const TensorShape& outShape = GetTensorInfo(data.m_Outputs[0]).GetShape(); + + const float* inData0 = GetInputTensorDataFloat(0, data); + const float* inData1 = GetInputTensorDataFloat(1, data); + float* outData = GetOutputTensorDataFloat(0, data); + + ArithmeticFunction(inShape0, inShape1, outShape, inData0, inData1, outData); +} + +template +void BaseUint8ArithmeticWorkload::ExecuteImpl(const char * debugString) const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString); + + auto data = Uint8Workload::GetData(); + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(data.m_Inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); + + auto dequant0 = Dequantize(GetInputTensorDataU8(0, data), inputInfo0); + auto dequant1 = Dequantize(GetInputTensorDataU8(1, data), inputInfo1); + + std::vector results(outputInfo.GetNumElements()); + + ArithmeticFunction(inputInfo0.GetShape(), + inputInfo1.GetShape(), + outputInfo.GetShape(), + dequant0.data(), + dequant1.data(), + results.data()); + + Quantize(GetOutputTensorDataU8(0, data), results.data(), outputInfo); +} + +} + +template class armnn::BaseFloat32ArithmeticWorkload>; +template class armnn::BaseUint8ArithmeticWorkload>; + +template class armnn::BaseFloat32ArithmeticWorkload>; +template class armnn::BaseUint8ArithmeticWorkload>; + +template class armnn::BaseFloat32ArithmeticWorkload>; +template class armnn::BaseUint8ArithmeticWorkload>; + +template class armnn::BaseFloat32ArithmeticWorkload>; +template class armnn::BaseUint8ArithmeticWorkload>; diff --git a/src/backends/reference/workloads/RefArithmeticWorkload.hpp b/src/backends/reference/workloads/RefArithmeticWorkload.hpp new file mode 100644 index 0000000000..7197b7a883 --- /dev/null +++ b/src/backends/reference/workloads/RefArithmeticWorkload.hpp @@ -0,0 +1,122 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include "backends/StringMapping.hpp" +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +template +class RefArithmeticWorkload +{ + // Needs specialization. The default is empty on purpose. +}; + +template +class BaseFloat32ArithmeticWorkload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + void ExecuteImpl(const char * debugString) const; +}; + +template +class RefArithmeticWorkload + : public BaseFloat32ArithmeticWorkload +{ +public: + using BaseFloat32ArithmeticWorkload::BaseFloat32ArithmeticWorkload; + + virtual void Execute() const override + { + using Parent = BaseFloat32ArithmeticWorkload; + Parent::ExecuteImpl(StringMapping::Instance().Get(DebugString)); + } +}; + +template +class BaseUint8ArithmeticWorkload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + void ExecuteImpl(const char * debugString) const; +}; + +template +class RefArithmeticWorkload + : public BaseUint8ArithmeticWorkload +{ +public: + using BaseUint8ArithmeticWorkload::BaseUint8ArithmeticWorkload; + + virtual void Execute() const override + { + using Parent = BaseUint8ArithmeticWorkload; + Parent::ExecuteImpl(StringMapping::Instance().Get(DebugString)); + } +}; + +using RefAdditionFloat32Workload = + RefArithmeticWorkload, + DataType::Float32, + AdditionQueueDescriptor, + StringMapping::RefAdditionWorkload_Execute>; + +using RefAdditionUint8Workload = + RefArithmeticWorkload, + DataType::QuantisedAsymm8, + AdditionQueueDescriptor, + StringMapping::RefAdditionWorkload_Execute>; + + +using RefSubtractionFloat32Workload = + RefArithmeticWorkload, + DataType::Float32, + SubtractionQueueDescriptor, + StringMapping::RefSubtractionWorkload_Execute>; + +using RefSubtractionUint8Workload = + RefArithmeticWorkload, + DataType::QuantisedAsymm8, + SubtractionQueueDescriptor, + StringMapping::RefSubtractionWorkload_Execute>; + +using RefMultiplicationFloat32Workload = + RefArithmeticWorkload, + DataType::Float32, + MultiplicationQueueDescriptor, + StringMapping::RefMultiplicationWorkload_Execute>; + +using RefMultiplicationUint8Workload = + RefArithmeticWorkload, + DataType::QuantisedAsymm8, + MultiplicationQueueDescriptor, + StringMapping::RefMultiplicationWorkload_Execute>; + +using RefDivisionFloat32Workload = + RefArithmeticWorkload, + DataType::Float32, + DivisionQueueDescriptor, + StringMapping::RefDivisionWorkload_Execute>; + +using RefDivisionUint8Workload = + RefArithmeticWorkload, + DataType::QuantisedAsymm8, + DivisionQueueDescriptor, + StringMapping::RefDivisionWorkload_Execute>; + +} // armnn diff --git a/src/backends/reference/workloads/RefBaseConstantWorkload.cpp b/src/backends/reference/workloads/RefBaseConstantWorkload.cpp new file mode 100644 index 0000000000..647677b4fb --- /dev/null +++ b/src/backends/reference/workloads/RefBaseConstantWorkload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefBaseConstantWorkload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include + +#include + +#include + +namespace armnn +{ + +template +void RefBaseConstantWorkload::Execute() const +{ + // Considering the reference backend independently, it could be possible to initialise the intermediate tensor + // created by the layer output handler at workload construction time, rather than at workload execution time. + // However, this is not an option for other backends (e.g. CL). For consistency, we prefer to align all + // implementations. + // A similar argument can be made about performing the memory copy in the first place (the layer output handler + // could have a non-owning reference to the layer output tensor managed by the const input layer); again, this is + // not an option for other backends, and the extra complexity required to make this work for the reference backend + // may not be worth the effort (skipping a memory copy in the first inference). + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + + const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); + BOOST_ASSERT(data.m_LayerOutput->GetTensorInfo().GetNumBytes() == outputInfo.GetNumBytes()); + + memcpy(GetOutputTensorData(0, data), data.m_LayerOutput->GetConstTensor(), + outputInfo.GetNumBytes()); + + m_RanOnce = true; + } +} + +template class RefBaseConstantWorkload; +template class RefBaseConstantWorkload; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefBaseConstantWorkload.hpp b/src/backends/reference/workloads/RefBaseConstantWorkload.hpp new file mode 100644 index 0000000000..8dc9fd6104 --- /dev/null +++ b/src/backends/reference/workloads/RefBaseConstantWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +#include + +namespace armnn +{ + +// Base class template providing an implementation of the Constant layer common to all data types. +template +class RefBaseConstantWorkload : public TypedWorkload +{ +public: + RefBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload(descriptor, info) + , m_RanOnce(false) + { + } + + virtual void Execute() const override; + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..313af9c438 --- /dev/null +++ b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefBatchNormalizationFloat32Workload.hpp" + +#include "BatchNormImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ +RefBatchNormalizationFloat32Workload::RefBatchNormalizationFloat32Workload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Mean(std::make_unique(*(descriptor.m_Mean))), + m_Variance(std::make_unique(*(descriptor.m_Variance))), + m_Beta(std::make_unique(*(descriptor.m_Beta))), + m_Gamma(std::make_unique(*(descriptor.m_Gamma))) {} + +void RefBatchNormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationFloat32Workload_Execute"); + + const float* var = m_Variance->GetConstTensor(); + const float* mean = m_Mean->GetConstTensor(); + const float* gamma = m_Gamma->GetConstTensor(); + const float* beta = m_Beta->GetConstTensor(); + + auto inputData = GetInputTensorDataFloat(0, m_Data); + auto outputData = GetOutputTensorDataFloat(0, m_Data); + + BatchNormImpl(m_Data, var, mean, gamma, beta, outputData, inputData); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..15c843c2ca --- /dev/null +++ b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefBatchNormalizationFloat32Workload : public Float32Workload +{ +public: + explicit RefBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Beta; + std::unique_ptr m_Gamma; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp new file mode 100644 index 0000000000..e248ad4b9d --- /dev/null +++ b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefBatchNormalizationUint8Workload.hpp" + +#include "BatchNormImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ +RefBatchNormalizationUint8Workload::RefBatchNormalizationUint8Workload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Mean(std::make_unique(*(descriptor.m_Mean))), + m_Variance(std::make_unique(*(descriptor.m_Variance))), + m_Beta(std::make_unique(*(descriptor.m_Beta))), + m_Gamma(std::make_unique(*(descriptor.m_Gamma))) {} + +void RefBatchNormalizationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationUint8Workload_Execute"); + + const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& varInfo = GetTensorInfo(m_Variance.get()); + const TensorInfo& meanInfo = GetTensorInfo(m_Mean.get()); + const TensorInfo& gammaInfo = GetTensorInfo(m_Gamma.get()); + const TensorInfo& betaInfo = GetTensorInfo(m_Beta.get()); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto input = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0); + auto var = Dequantize(m_Variance->GetConstTensor(), varInfo); + auto mean = Dequantize(m_Mean->GetConstTensor(), meanInfo); + auto gamma = Dequantize(m_Gamma->GetConstTensor(), gammaInfo); + auto beta = Dequantize(m_Beta->GetConstTensor(), betaInfo); + + std::vector results(outputInfo.GetNumElements()); + BatchNormImpl(m_Data, var.data(), mean.data(), gamma.data(), beta.data(), results.data(), input.data()); + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp new file mode 100644 index 0000000000..d3e8e0a120 --- /dev/null +++ b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefBatchNormalizationUint8Workload : public Uint8Workload +{ +public: + explicit RefBatchNormalizationUint8Workload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Beta; + std::unique_ptr m_Gamma; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConstantFloat32Workload.cpp b/src/backends/reference/workloads/RefConstantFloat32Workload.cpp new file mode 100644 index 0000000000..074e8ccaae --- /dev/null +++ b/src/backends/reference/workloads/RefConstantFloat32Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConstantFloat32Workload.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConstantFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantFloat32Workload_Execute"); + RefBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConstantFloat32Workload.hpp b/src/backends/reference/workloads/RefConstantFloat32Workload.hpp new file mode 100644 index 0000000000..76e3a42026 --- /dev/null +++ b/src/backends/reference/workloads/RefConstantFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefBaseConstantWorkload.hpp" + +namespace armnn +{ + +class RefConstantFloat32Workload : public RefBaseConstantWorkload +{ +public: + using RefBaseConstantWorkload::RefBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConstantUint8Workload.cpp b/src/backends/reference/workloads/RefConstantUint8Workload.cpp new file mode 100644 index 0000000000..07e4719d54 --- /dev/null +++ b/src/backends/reference/workloads/RefConstantUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConstantUint8Workload.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantUint8Workload_Execute"); + RefBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConstantUint8Workload.hpp b/src/backends/reference/workloads/RefConstantUint8Workload.hpp new file mode 100644 index 0000000000..02552ac80b --- /dev/null +++ b/src/backends/reference/workloads/RefConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefBaseConstantWorkload.hpp" + +namespace armnn +{ + +class RefConstantUint8Workload : public RefBaseConstantWorkload +{ +public: + using RefBaseConstantWorkload::RefBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..e148bf6a9d --- /dev/null +++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConvertFp16ToFp32Workload.hpp" +#include "Half.hpp" +#include "RefWorkloadUtils.hpp" +#include "FloatingPointConverter.hpp" + +namespace armnn +{ + +void RefConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp16ToFp32Workload_Execute"); + + const Half* const input = GetInputTensorDataHalf(0, m_Data); + float* const output = GetOutputTensorDataFloat(0, m_Data); + + unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..5e841ba34f --- /dev/null +++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvertFp16ToFp32Workload : public Float16ToFloat32Workload +{ +public: + using Float16ToFloat32Workload::Float16ToFloat32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..efaaf8e1ad --- /dev/null +++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConvertFp32ToFp16Workload.hpp" + +#include "Half.hpp" +#include "FloatingPointConverter.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToFp16Workload_Execute"); + + const float* const input = GetInputTensorDataFloat(0, m_Data); + Half* const output = GetOutputTensorDataHalf(0, m_Data); + + // convert Fp32 input to Fp16 output + unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..0754fd5c79 --- /dev/null +++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvertFp32ToFp16Workload : public Float32ToFloat16Workload +{ +public: + using Float32ToFloat16Workload::Float32ToFloat16Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp new file mode 100644 index 0000000000..20905646d7 --- /dev/null +++ b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConvolution2dFloat32Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ +RefConvolution2dFloat32Workload::RefConvolution2dFloat32Workload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefConvolution2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute"); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + const float* weightData = m_Weight->template GetConstTensor(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Bias->template GetConstTensor() : nullptr; + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); + + ConvImpl( + m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp new file mode 100644 index 0000000000..34489e807c --- /dev/null +++ b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvolution2dFloat32Workload : public Float32Workload +{ +public: + explicit RefConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; + +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..881e9bf6b0 --- /dev/null +++ b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp @@ -0,0 +1,45 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConvolution2dUint8Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ +RefConvolution2dUint8Workload::RefConvolution2dUint8Workload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dUint8Workload_Execute"); + + const uint8_t* inputData = GetInputTensorDataU8(0, m_Data); + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const uint8_t* weightsData = m_Weight->template GetConstTensor(); + const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Bias->template GetConstTensor() : + nullptr; + uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); + + ConvImpl( + m_Data, + inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), + weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), + biasData, + outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..0e2dd6aada --- /dev/null +++ b/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvolution2dUint8Workload : public Uint8Workload +{ +public: + explicit RefConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; + +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp new file mode 100644 index 0000000000..e89013b9bd --- /dev/null +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefDepthwiseConvolution2dFloat32Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ +RefDepthwiseConvolution2dFloat32Workload::RefDepthwiseConvolution2dFloat32Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefDepthwiseConvolution2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute"); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + const float* weightData = m_Weight->template GetConstTensor(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Bias->template GetConstTensor() : nullptr; + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); + + ConvImpl + (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp new file mode 100644 index 0000000000..8f1227e2de --- /dev/null +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefDepthwiseConvolution2dFloat32Workload : public Float32Workload +{ +public: + explicit RefDepthwiseConvolution2dFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..e8e501d6ae --- /dev/null +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefDepthwiseConvolution2dUint8Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +RefDepthwiseConvolution2dUint8Workload::RefDepthwiseConvolution2dUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefDepthwiseConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dUint8Workload_Execute"); + + const uint8_t* inputData = GetInputTensorDataU8(0, m_Data); + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const uint8_t* weightsData = m_Weight->template GetConstTensor(); + const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Bias->template GetConstTensor() : + nullptr; + uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); + + ConvImpl( + m_Data, + inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), + weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), + biasData, + outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..c615cf7880 --- /dev/null +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefDepthwiseConvolution2dUint8Workload : public Uint8Workload +{ +public: + explicit RefDepthwiseConvolution2dUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp new file mode 100644 index 0000000000..3e16f60b11 --- /dev/null +++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.cpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefFakeQuantizationFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void FakeQuantization(const float* inputData, float* outputData, uint32_t numElements, float min, float max) +{ + float scale = (max - min) / 255.f; + int32_t offset = boost::numeric_cast((-min * 255.f) / (max - min)); + + for (uint32_t i = 0; i < numElements; i++) + { + outputData[i] = static_cast(armnn::Quantize(inputData[i], scale, offset)); + } + +} + +void RefFakeQuantizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFakeQuantizationFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + const float* inputData = GetInputTensorDataFloat(0, m_Data); + float* outputData = GetOutputTensorDataFloat(0, m_Data); + FakeQuantization(inputData, outputData, inputInfo.GetNumElements(), + m_Data.m_Parameters.m_Min, + m_Data.m_Parameters.m_Max); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp new file mode 100644 index 0000000000..523fdcff50 --- /dev/null +++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFakeQuantizationFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefFloorFloat32Workload.cpp b/src/backends/reference/workloads/RefFloorFloat32Workload.cpp new file mode 100644 index 0000000000..cc1f8800dc --- /dev/null +++ b/src/backends/reference/workloads/RefFloorFloat32Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefFloorFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefFloorFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFloorFloat32Workload_Execute"); + + const float* const input = GetInputTensorDataFloat(0, m_Data); + float* const output = GetOutputTensorDataFloat(0, m_Data); + + unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + for (unsigned int i = 0; i < numElements; ++i) + { + output[i] = floorf(input[i]); + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefFloorFloat32Workload.hpp b/src/backends/reference/workloads/RefFloorFloat32Workload.hpp new file mode 100644 index 0000000000..d7cfa50365 --- /dev/null +++ b/src/backends/reference/workloads/RefFloorFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFloorFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.cpp b/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.cpp new file mode 100644 index 0000000000..ccaf4cd87b --- /dev/null +++ b/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefFullyConnectedFloat32Workload.hpp" + +#include "FullyConnected.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ +RefFullyConnectedFloat32Workload::RefFullyConnectedFloat32Workload( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefFullyConnectedFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + const float* weightData = m_Weight->GetConstTensor(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->GetConstTensor() : nullptr; + + FullyConnected(inputData, + outputData, + inputInfo, + outputInfo, + weightData, + biasData, + m_Data.m_Parameters.m_TransposeWeightMatrix); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp b/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp new file mode 100644 index 0000000000..ce058690ac --- /dev/null +++ b/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFullyConnectedFloat32Workload : public Float32Workload +{ +public: + explicit RefFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefFullyConnectedUint8Workload.cpp b/src/backends/reference/workloads/RefFullyConnectedUint8Workload.cpp new file mode 100644 index 0000000000..cd785d786c --- /dev/null +++ b/src/backends/reference/workloads/RefFullyConnectedUint8Workload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefFullyConnectedUint8Workload.hpp" + +#include "FullyConnected.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ +RefFullyConnectedUint8Workload::RefFullyConnectedUint8Workload( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefFullyConnectedUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedUint8Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + const uint8_t* weightData = m_Weight->GetConstTensor(); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); + + auto weight = Dequantize(weightData, m_Weight->GetTensorInfo()); + + std::vector results(outputInfo.GetNumElements()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + const int32_t* biasData = m_Bias->GetConstTensor(); + auto bias = Dequantize(biasData, m_Bias->GetTensorInfo()); + + FullyConnected(dequant.data(), + results.data(), + inputInfo, + outputInfo, + weight.data(), + bias.data(), + m_Data.m_Parameters.m_TransposeWeightMatrix); + } + else + { + FullyConnected(dequant.data(), + results.data(), + inputInfo, + outputInfo, + weight.data(), + nullptr, + m_Data.m_Parameters.m_TransposeWeightMatrix); + } + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp b/src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp new file mode 100644 index 0000000000..e489cc7d81 --- /dev/null +++ b/src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFullyConnectedUint8Workload : public Uint8Workload +{ +public: + explicit RefFullyConnectedUint8Workload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.cpp b/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..973c87b009 --- /dev/null +++ b/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.cpp @@ -0,0 +1,61 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefL2NormalizationFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "TensorBufferArrayView.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefL2NormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefL2NormalizationFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + TensorBufferArrayView input(inputInfo.GetShape(), GetInputTensorDataFloat(0, m_Data)); + TensorBufferArrayView output(outputInfo.GetShape(), GetOutputTensorDataFloat(0, m_Data)); + + const unsigned int batchSize = inputInfo.GetShape()[0]; + const unsigned int depth = inputInfo.GetShape()[1]; + const unsigned int rows = inputInfo.GetShape()[2]; + const unsigned int cols = inputInfo.GetShape()[3]; + + for (unsigned int n = 0; n < batchSize; ++n) + { + for (unsigned int d = 0; d < depth; ++d) + { + for (unsigned int h = 0; h < rows; ++h) + { + for (unsigned int w = 0; w < cols; ++w) + { + float reduction = 0.0; + for (unsigned int c = 0; c < depth; ++c) + { + const float value = input.Get(n, c, h, w); + reduction += value * value; + } + + // Using std::max(reduction, epsilon) below would prevent against division by 0. + // However, at the time of writing: + // - This is not supported by the ACL functions used to implement L2Normalization in the CL + // backend. + // - The reference semantics for this operator do not include this parameter. + const float scale = 1.0f / sqrtf(reduction); + output.Get(n, d, h, w) = input.Get(n, d, h, w) * scale; + } + } + } + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..a3f03f3060 --- /dev/null +++ b/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefL2NormalizationFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefLstmFloat32Workload.cpp b/src/backends/reference/workloads/RefLstmFloat32Workload.cpp new file mode 100644 index 0000000000..50ff605701 --- /dev/null +++ b/src/backends/reference/workloads/RefLstmFloat32Workload.cpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefLstmFloat32Workload.hpp" + +namespace armnn +{ + +void RefLstmFloat32Workload::Execute() const +{ + throw armnn::Exception("No implementation of Lstm in the Ref backend!"); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefLstmFloat32Workload.hpp b/src/backends/reference/workloads/RefLstmFloat32Workload.hpp new file mode 100644 index 0000000000..fc4f7776c6 --- /dev/null +++ b/src/backends/reference/workloads/RefLstmFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefLstmFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefMergerFloat32Workload.cpp b/src/backends/reference/workloads/RefMergerFloat32Workload.cpp new file mode 100644 index 0000000000..b1f8a32ee7 --- /dev/null +++ b/src/backends/reference/workloads/RefMergerFloat32Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefMergerFloat32Workload.hpp" + +#include "Merger.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefMergerFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerFloat32Workload_Execute"); + Merger(m_Data); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefMergerFloat32Workload.hpp b/src/backends/reference/workloads/RefMergerFloat32Workload.hpp new file mode 100644 index 0000000000..23a523c852 --- /dev/null +++ b/src/backends/reference/workloads/RefMergerFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefMergerFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefMergerUint8Workload.cpp b/src/backends/reference/workloads/RefMergerUint8Workload.cpp new file mode 100644 index 0000000000..47ce1cf731 --- /dev/null +++ b/src/backends/reference/workloads/RefMergerUint8Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefMergerUint8Workload.hpp" + +#include "Merger.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerUint8Workload_Execute"); + Merger(m_Data); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefMergerUint8Workload.hpp b/src/backends/reference/workloads/RefMergerUint8Workload.hpp new file mode 100644 index 0000000000..65dc42120a --- /dev/null +++ b/src/backends/reference/workloads/RefMergerUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefMergerUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp b/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..5c24416624 --- /dev/null +++ b/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp @@ -0,0 +1,185 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefNormalizationFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +#include +#include + +namespace armnn +{ + +// Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization. +static void NormalizeWithinUingLbr(const float* inputData, + float* outputData, + const TensorShape& tensorShape, + uint32_t norm_size, + float alpha, + float beta, + float kappa) +{ + const unsigned int batchSize = tensorShape[0]; + const unsigned int depth = tensorShape[1]; + const unsigned int rows = tensorShape[2]; + const unsigned int cols = tensorShape[3]; + + int radius = boost::numeric_cast(norm_size / 2u); /* Strong Assumption on rounding Mode */ + + for (unsigned int n = 0; n < batchSize; n++) + { + for (unsigned int c = 0; c < depth; c++) + { + for (unsigned int h = 0; h < rows; h++) + { + for (unsigned int w = 0; w < cols; w++) + { + float accumulated_scale = 0.0; + for (int y = -radius; y <= radius; y++) + { + for (int x = -radius; x <= radius; x++) + { + int i = boost::numeric_cast(w) + x; + int j = boost::numeric_cast(h) + y; + + if ((i < 0) || (i >= boost::numeric_cast(cols))) + { + continue; + } + + if ((j < 0) || (j >= boost::numeric_cast(rows))) + { + continue; + } + + float inval = inputData[n * cols * rows * depth + + c * cols * rows + + boost::numeric_cast(j) * cols + + boost::numeric_cast(i)]; + + accumulated_scale += inval*inval; + } + } + outputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w] = inputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w] / (powf((kappa + (accumulated_scale * alpha)), beta)); + } + } + } + } +} + +// Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization. +void NormalizeAcrossUingLbr(const float* inputData, + float* outputData, + const TensorShape& tensorShape, + uint32_t norm_size, + float alpha, + float beta, + float kappa) +{ + const unsigned int batchSize = tensorShape[0]; + const unsigned int depth = tensorShape[1]; + const unsigned int rows = tensorShape[2]; + const unsigned int cols = tensorShape[3]; + + int radius = boost::numeric_cast(norm_size / 2u); /* Strong Assumption on rounding Mode */ + + for (unsigned int n = 0; n < batchSize; n++) + { + for (unsigned int c = 0; c < depth; c++) + { + for (unsigned int h = 0; h < rows; h++) + { + for (unsigned int w = 0; w < cols; w++) + { + float accumulated_scale = 0.0; + for (int z = -radius; z <= radius; z++) + { + int k = boost::numeric_cast(c) + z; + + if ((k < 0) || (k >= boost::numeric_cast(depth))) + { + continue; + } + + float inval = inputData[n * cols * rows * depth + + boost::numeric_cast(k) * cols * rows + + h * cols + + w]; + + accumulated_scale += inval*inval; + } + float scale = kappa + (accumulated_scale * alpha); + scale = powf(scale, -beta); + outputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w] = scale * + inputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w]; + } + } + } + } +} + +void RefNormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + + + if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType) + { + if (NormalizationAlgorithmChannel::Within == m_Data.m_Parameters.m_NormChannelType) + { + NormalizeWithinUingLbr(inputData, + outputData, + inputInfo.GetShape(), + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K); + } + else if (NormalizationAlgorithmChannel::Across == m_Data.m_Parameters.m_NormChannelType) + { + NormalizeAcrossUingLbr(inputData, + outputData, + inputInfo.GetShape(), + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K); + } + else + { + BOOST_LOG_TRIVIAL(warning) << "Illegal NORMALIZATION mode in normalization_f32"; + return; + } + } + else + { + BOOST_LOG_TRIVIAL(warning) << "Lcr method (Jarret 2009: Local Contrast Normalization) not supported yet."; + return; + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..e30356c422 --- /dev/null +++ b/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefNormalizationFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefPermuteWorkload.cpp b/src/backends/reference/workloads/RefPermuteWorkload.cpp new file mode 100644 index 0000000000..4093ff38f4 --- /dev/null +++ b/src/backends/reference/workloads/RefPermuteWorkload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefPermuteWorkload.hpp" +#include "RefWorkloadUtils.hpp" + +#include +#include "TypeUtils.hpp" + +namespace armnn +{ + +template +void RefPermuteWorkload::Execute() const +{ + using T = ResolveType; + + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute"); + + const ITensorHandle* src = m_Data.m_Inputs[0]; + const ITensorHandle* dst = m_Data.m_Outputs[0]; + const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, GetConstCpuData(src), GetCpuData(dst)); +} + +template class RefPermuteWorkload; +template class RefPermuteWorkload; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefPermuteWorkload.hpp b/src/backends/reference/workloads/RefPermuteWorkload.hpp new file mode 100644 index 0000000000..d72cf77e74 --- /dev/null +++ b/src/backends/reference/workloads/RefPermuteWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +template +class RefPermuteWorkload : public TypedWorkload +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("RefPermute") + GetDataTypeName(DataType) + "Workload"; + return name; + } + + using TypedWorkload::m_Data; + using TypedWorkload::TypedWorkload; + void Execute() const override; +}; + +using RefPermuteFloat32Workload = RefPermuteWorkload; +using RefPermuteUint8Workload = RefPermuteWorkload; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefPooling2dFloat32Workload.cpp b/src/backends/reference/workloads/RefPooling2dFloat32Workload.cpp new file mode 100644 index 0000000000..2542756c26 --- /dev/null +++ b/src/backends/reference/workloads/RefPooling2dFloat32Workload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefPooling2dFloat32Workload.hpp" + +#include "Pooling2d.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefPooling2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dFloat32Workload_Execute"); + + const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo0 = GetTensorInfo(m_Data.m_Outputs[0]); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + + Pooling2d(inputData, + outputData, + inputInfo0, + outputInfo0, + m_Data.m_Parameters); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp b/src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp new file mode 100644 index 0000000000..501fb71aff --- /dev/null +++ b/src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefPooling2dFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefPooling2dUint8Workload.cpp b/src/backends/reference/workloads/RefPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..91fdf291ee --- /dev/null +++ b/src/backends/reference/workloads/RefPooling2dUint8Workload.cpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefPooling2dUint8Workload.hpp" + +#include "Pooling2d.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dUint8Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); + + std::vector results(outputInfo.GetNumElements()); + Pooling2d(dequant.data(), + results.data(), + inputInfo, + outputInfo, + m_Data.m_Parameters); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefPooling2dUint8Workload.hpp b/src/backends/reference/workloads/RefPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..6544f9a785 --- /dev/null +++ b/src/backends/reference/workloads/RefPooling2dUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefPooling2dUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefReshapeFloat32Workload.cpp b/src/backends/reference/workloads/RefReshapeFloat32Workload.cpp new file mode 100644 index 0000000000..99c94a49a1 --- /dev/null +++ b/src/backends/reference/workloads/RefReshapeFloat32Workload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefReshapeFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefReshapeFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeFloat32Workload_Execute"); + + void* output = GetOutputTensorData(0, m_Data); + const void* input = GetInputTensorData(0, m_Data); + unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes(); + memcpy(output, input, numBytes); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefReshapeFloat32Workload.hpp b/src/backends/reference/workloads/RefReshapeFloat32Workload.hpp new file mode 100644 index 0000000000..9281e89cf7 --- /dev/null +++ b/src/backends/reference/workloads/RefReshapeFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefReshapeFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefReshapeUint8Workload.cpp b/src/backends/reference/workloads/RefReshapeUint8Workload.cpp new file mode 100644 index 0000000000..8f475f3db3 --- /dev/null +++ b/src/backends/reference/workloads/RefReshapeUint8Workload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefReshapeUint8Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeUint8Workload_Execute"); + + void* output = GetOutputTensorData(0, m_Data); + const void* input = GetInputTensorData(0, m_Data); + unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes(); + memcpy(output, input, numBytes); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefReshapeUint8Workload.hpp b/src/backends/reference/workloads/RefReshapeUint8Workload.hpp new file mode 100644 index 0000000000..b37fb4bdeb --- /dev/null +++ b/src/backends/reference/workloads/RefReshapeUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefReshapeUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.cpp b/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.cpp new file mode 100644 index 0000000000..50ee7a218a --- /dev/null +++ b/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefResizeBilinearFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "ResizeBilinear.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefResizeBilinearFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + ResizeBilinear(GetInputTensorDataFloat(0, m_Data), + inputInfo, + GetOutputTensorDataFloat(0, m_Data), + outputInfo); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp b/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp new file mode 100644 index 0000000000..0fff7ee695 --- /dev/null +++ b/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefResizeBilinearFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefResizeBilinearUint8Workload.cpp b/src/backends/reference/workloads/RefResizeBilinearUint8Workload.cpp new file mode 100644 index 0000000000..67ab039ef3 --- /dev/null +++ b/src/backends/reference/workloads/RefResizeBilinearUint8Workload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefResizeBilinearUint8Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "ResizeBilinear.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefResizeBilinearUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearUint8Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); + + std::vector results(outputInfo.GetNumElements()); + ResizeBilinear(dequant.data(), inputInfo, results.data(), outputInfo); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp b/src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp new file mode 100644 index 0000000000..bbaf899ca6 --- /dev/null +++ b/src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefResizeBilinearUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefSoftmaxFloat32Workload.cpp b/src/backends/reference/workloads/RefSoftmaxFloat32Workload.cpp new file mode 100644 index 0000000000..1f519bda10 --- /dev/null +++ b/src/backends/reference/workloads/RefSoftmaxFloat32Workload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefSoftmaxFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "Softmax.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefSoftmaxFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxFloat32Workload_Execute"); + + Softmax(GetInputTensorDataFloat(0, m_Data), + GetOutputTensorDataFloat(0, m_Data), + GetTensorInfo(m_Data.m_Inputs[0]), + m_Data.m_Parameters.m_Beta); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp b/src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp new file mode 100644 index 0000000000..d37f2b5990 --- /dev/null +++ b/src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSoftmaxFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefSoftmaxUint8Workload.cpp b/src/backends/reference/workloads/RefSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..17114ec83a --- /dev/null +++ b/src/backends/reference/workloads/RefSoftmaxUint8Workload.cpp @@ -0,0 +1,36 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefSoftmaxUint8Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "Softmax.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxUint8Workload_Execute"); + + const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo); + + std::vector results(tensorInfo.GetNumElements()); + + Softmax(dequant.data(), + results.data(), + tensorInfo, + m_Data.m_Parameters.m_Beta); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0])); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp b/src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..b179d529da --- /dev/null +++ b/src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSoftmaxUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefSplitterFloat32Workload.cpp b/src/backends/reference/workloads/RefSplitterFloat32Workload.cpp new file mode 100644 index 0000000000..75611dacf3 --- /dev/null +++ b/src/backends/reference/workloads/RefSplitterFloat32Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefSplitterFloat32Workload.hpp" + +#include "Splitter.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefSplitterFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterFloat32Workload_Execute"); + Splitter(m_Data); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefSplitterFloat32Workload.hpp b/src/backends/reference/workloads/RefSplitterFloat32Workload.hpp new file mode 100644 index 0000000000..12176dd277 --- /dev/null +++ b/src/backends/reference/workloads/RefSplitterFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSplitterFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefSplitterUint8Workload.cpp b/src/backends/reference/workloads/RefSplitterUint8Workload.cpp new file mode 100644 index 0000000000..ca9f5db850 --- /dev/null +++ b/src/backends/reference/workloads/RefSplitterUint8Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefSplitterUint8Workload.hpp" + +#include "Splitter.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterUint8Workload_Execute"); + Splitter(m_Data); +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefSplitterUint8Workload.hpp b/src/backends/reference/workloads/RefSplitterUint8Workload.hpp new file mode 100644 index 0000000000..e80cb1a654 --- /dev/null +++ b/src/backends/reference/workloads/RefSplitterUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSplitterUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefWorkloadUtils.hpp b/src/backends/reference/workloads/RefWorkloadUtils.hpp new file mode 100644 index 0000000000..616a875028 --- /dev/null +++ b/src/backends/reference/workloads/RefWorkloadUtils.hpp @@ -0,0 +1,138 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/CpuTensorHandle.hpp" + +#include +#include +#include + +#include + +namespace armnn +{ + +//////////////////////////////////////////// +/// float32 helpers +//////////////////////////////////////////// + +inline const TensorInfo& GetTensorInfo(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. + const ConstCpuTensorHandle* cpuTensorHandle = + boost::polymorphic_downcast(tensorHandle); + return cpuTensorHandle->GetTensorInfo(); +} + +template +inline const DataType* GetConstCpuData(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate. + const ConstCpuTensorHandle* cpuTensorHandle = + boost::polymorphic_downcast(tensorHandle); + return cpuTensorHandle->GetConstTensor(); +} + +template +inline DataType* GetCpuData(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. + const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast(tensorHandle); + return cpuTensorHandle->GetTensor(); +}; + +template +const DataType* GetInputTensorData(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Inputs[idx]; + return GetConstCpuData(tensorHandle); +} + +template +DataType* GetOutputTensorData(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Outputs[idx]; + return GetCpuData(tensorHandle); +} + +template +const float* GetInputTensorDataFloat(unsigned int idx, const PayloadType& data) +{ + return GetInputTensorData(idx, data); +} + +template +float* GetOutputTensorDataFloat(unsigned int idx, const PayloadType& data) +{ + return GetOutputTensorData(idx, data); +} + +template +const Half* GetInputTensorDataHalf(unsigned int idx, const PayloadType& data) +{ + return GetInputTensorData(idx, data); +} + +template +Half* GetOutputTensorDataHalf(unsigned int idx, const PayloadType& data) +{ + return GetOutputTensorData(idx, data); +} + +//////////////////////////////////////////// +/// u8 helpers +//////////////////////////////////////////// + +inline const uint8_t* GetConstCpuU8Data(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate. + const ConstCpuTensorHandle* cpuTensorHandle = + boost::polymorphic_downcast(tensorHandle); + return cpuTensorHandle->GetConstTensor(); +}; + +inline uint8_t* GetCpuU8Data(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. + const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast(tensorHandle); + return cpuTensorHandle->GetTensor(); +}; + +template +const uint8_t* GetInputTensorDataU8(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Inputs[idx]; + return GetConstCpuU8Data(tensorHandle); +} + +template +uint8_t* GetOutputTensorDataU8(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Outputs[idx]; + return GetCpuU8Data(tensorHandle); +} + +template +std::vector Dequantize(const T* quant, const TensorInfo& info) +{ + std::vector ret(info.GetNumElements()); + for (size_t i = 0; i < info.GetNumElements(); i++) + { + ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); + } + return ret; +} + +inline void Quantize(uint8_t* quant, const float* dequant, const TensorInfo& info) +{ + for (size_t i = 0; i < info.GetNumElements(); i++) + { + quant[i] = armnn::Quantize(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp new file mode 100644 index 0000000000..e5c6e1e9d5 --- /dev/null +++ b/src/backends/reference/workloads/RefWorkloads.hpp @@ -0,0 +1,53 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefConstantUint8Workload.hpp" +#include "ArithmeticFunction.hpp" +#include "RefArithmeticWorkload.hpp" +#include "ConvImpl.hpp" +#include "RefBaseConstantWorkload.hpp" +#include "RefConvolution2dUint8Workload.hpp" +#include "RefSplitterUint8Workload.hpp" +#include "RefResizeBilinearUint8Workload.hpp" +#include "RefL2NormalizationFloat32Workload.hpp" +#include "RefActivationUint8Workload.hpp" +#include "RefPooling2dFloat32Workload.hpp" +#include "RefWorkloadUtils.hpp" +#include "RefMergerUint8Workload.hpp" +#include "RefFullyConnectedFloat32Workload.hpp" +#include "Softmax.hpp" +#include "RefMergerFloat32Workload.hpp" +#include "TensorBufferArrayView.hpp" +#include "RefBatchNormalizationFloat32Workload.hpp" +#include "Splitter.hpp" +#include "RefFullyConnectedUint8Workload.hpp" +#include "RefReshapeFloat32Workload.hpp" +#include "RefDepthwiseConvolution2dUint8Workload.hpp" +#include "FullyConnected.hpp" +#include "RefFloorFloat32Workload.hpp" +#include "RefSoftmaxFloat32Workload.hpp" +#include "RefSoftmaxUint8Workload.hpp" +#include "RefReshapeUint8Workload.hpp" +#include "RefResizeBilinearFloat32Workload.hpp" +#include "RefBatchNormalizationUint8Workload.hpp" +#include "ResizeBilinear.hpp" +#include "RefNormalizationFloat32Workload.hpp" +#include "RefDepthwiseConvolution2dFloat32Workload.hpp" +#include "RefPooling2dUint8Workload.hpp" +#include "BatchNormImpl.hpp" +#include "Activation.hpp" +#include "Merger.hpp" +#include "RefSplitterFloat32Workload.hpp" +#include "RefConstantFloat32Workload.hpp" +#include "RefActivationFloat32Workload.hpp" +#include "RefConvolution2dFloat32Workload.hpp" +#include "Pooling2d.hpp" +#include "RefFakeQuantizationFloat32Workload.hpp" +#include "RefPermuteWorkload.hpp" +#include "RefLstmFloat32Workload.hpp" +#include "RefConvertFp16ToFp32Workload.hpp" +#include "RefConvertFp32ToFp16Workload.hpp" diff --git a/src/backends/reference/workloads/ResizeBilinear.cpp b/src/backends/reference/workloads/ResizeBilinear.cpp new file mode 100644 index 0000000000..0bce3c7ed8 --- /dev/null +++ b/src/backends/reference/workloads/ResizeBilinear.cpp @@ -0,0 +1,92 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ResizeBilinear.hpp" + +#include "TensorBufferArrayView.hpp" + +#include + +#include +#include + +namespace armnn +{ + +namespace +{ + +inline float Lerp(float a, float b, float w) +{ + return w * b + (1.f - w) * a; +} + +} + +void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo) +{ + // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output + // image is projected into the input image to figure out the interpolants and weights. Note that this + // will yield different results than if projecting the centre of output texels. + + const unsigned int batchSize = inputInfo.GetShape()[0]; + const unsigned int channelCount = inputInfo.GetShape()[1]; + + const unsigned int inputHeight = inputInfo.GetShape()[2]; + const unsigned int inputWidth = inputInfo.GetShape()[3]; + const unsigned int outputHeight = outputInfo.GetShape()[2]; + const unsigned int outputWidth = outputInfo.GetShape()[3]; + + // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates + // in the input image. + const float scaleY = boost::numeric_cast(inputHeight) / boost::numeric_cast(outputHeight); + const float scaleX = boost::numeric_cast(inputWidth) / boost::numeric_cast(outputWidth); + + TensorBufferArrayView input(inputInfo.GetShape(), in); + TensorBufferArrayView output(outputInfo.GetShape(), out); + + for (unsigned int n = 0; n < batchSize; ++n) + { + for (unsigned int c = 0; c < channelCount; ++c) + { + for (unsigned int y = 0; y < outputHeight; ++y) + { + // Corresponding real-valued height coordinate in input image. + const float iy = boost::numeric_cast(y) * scaleY; + + // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation). + const float fiy = floorf(iy); + const unsigned int y0 = boost::numeric_cast(fiy); + + // Interpolation weight (range [0,1]). + const float yw = iy - fiy; + + for (unsigned int x = 0; x < outputWidth; ++x) + { + // Real-valued and discrete width coordinates in input image. + const float ix = boost::numeric_cast(x) * scaleX; + const float fix = floorf(ix); + const unsigned int x0 = boost::numeric_cast(fix); + + // Interpolation weight (range [0,1]). + const float xw = ix - fix; + + // Discrete width/height coordinates of texels below and to the right of (x0, y0). + const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u); + const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u); + + // Interpolation + const float ly0 = Lerp(input.Get(n, c, y0, x0), input.Get(n, c, y0, x1), xw); // lerp along row y0. + const float ly1 = Lerp(input.Get(n, c, y1, x0), input.Get(n, c, y1, x1), xw); // lerp along row y1. + const float l = Lerp(ly0, ly1, yw); + + output.Get(n, c, y, x) = l; + } + } + } + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/ResizeBilinear.hpp b/src/backends/reference/workloads/ResizeBilinear.hpp new file mode 100644 index 0000000000..847b8e8bef --- /dev/null +++ b/src/backends/reference/workloads/ResizeBilinear.hpp @@ -0,0 +1,15 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo); + +} //namespace armnn diff --git a/src/backends/reference/workloads/Softmax.cpp b/src/backends/reference/workloads/Softmax.cpp new file mode 100644 index 0000000000..4f1016e86c --- /dev/null +++ b/src/backends/reference/workloads/Softmax.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Softmax.hpp" + +#include +#include + +namespace armnn +{ + +/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. +void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta) +{ + unsigned int numChannels = tensorInfo.GetShape()[1]; + for (unsigned int n = 0; n < tensorInfo.GetShape()[0]; n++) + { + // Find maximum channel. + float max = in[n * numChannels]; + for (unsigned int c = 1; c < numChannels; c++) + { + float val = in[n * numChannels + c]; + if (val > max) + { + max = val; + } + } + + // Exponentiate all values and sum. + std::vector exponentials(numChannels); + float sum = 0.0f; + for (unsigned int c = 0; c < numChannels; c++) + { + float val = in[n * numChannels + c]; + exponentials[c] = expf((val - max) * beta); + sum += exponentials[c]; + } + + // Divide exponentials by sum to give outputs. + for (unsigned int c = 0; c < numChannels; c++) + { + out[n * numChannels + c] = exponentials[c] / sum; + } + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/Softmax.hpp b/src/backends/reference/workloads/Softmax.hpp new file mode 100644 index 0000000000..3b974f9e9e --- /dev/null +++ b/src/backends/reference/workloads/Softmax.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. +void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta); + +} //namespace armnn diff --git a/src/backends/reference/workloads/Splitter.hpp b/src/backends/reference/workloads/Splitter.hpp new file mode 100644 index 0000000000..e9c0379c9e --- /dev/null +++ b/src/backends/reference/workloads/Splitter.hpp @@ -0,0 +1,84 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include "backends/WorkloadData.hpp" + +#include + +#include + +namespace armnn +{ + +template +void Splitter(const SplitterQueueDescriptor& data) +{ + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + + for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index) + { + unsigned int indices[MaxNumOfTensorDimensions] = { 0 }; + + unsigned int indexRemainder = index; + unsigned int dimensionStride = inputInfo0.GetNumElements(); + + for (unsigned int i = 0; i= view.m_Origin[i] + outputInfo.GetShape()[i]) + { + insideView = false; + } + } + + if (insideView) + { + unsigned int outIndex = 0; + unsigned int dimensionStride = 1; + + for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;) + { + outIndex += dimensionStride * (indices[i] - view.m_Origin[i]); + dimensionStride *= outputInfo.GetShape()[i]; + } + + //We are within the view, to copy input data to the output corresponding to this view. + DataType* outputData = GetOutputTensorData(viewIdx, data); + BOOST_ASSERT(outputData); + + const DataType* inputData = GetInputTensorData(0, data); + BOOST_ASSERT(inputData); + + outputData[outIndex] = inputData[index]; + } + } + } +} + +} //namespace armnn diff --git a/src/backends/reference/workloads/TensorBufferArrayView.hpp b/src/backends/reference/workloads/TensorBufferArrayView.hpp new file mode 100644 index 0000000000..e19810ca87 --- /dev/null +++ b/src/backends/reference/workloads/TensorBufferArrayView.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +namespace armnn +{ + +// Utility class providing access to raw tensor memory based on indices along each dimension. +template +class TensorBufferArrayView +{ +public: + TensorBufferArrayView(const TensorShape& shape, DataType* data) + : m_Shape(shape) + , m_Data(data) + { + } + + DataType& Get(unsigned int b, unsigned int c, unsigned int h, unsigned int w) const + { + BOOST_ASSERT( b < m_Shape[0] || (m_Shape[0] == 0 && b == 0) ); + BOOST_ASSERT( c < m_Shape[1] || (m_Shape[1] == 0 && c == 0) ); + BOOST_ASSERT( h < m_Shape[2] || (m_Shape[2] == 0 && h == 0) ); + BOOST_ASSERT( w < m_Shape[3] || (m_Shape[3] == 0 && w == 0) ); + + return m_Data[b * m_Shape[1] * m_Shape[2] * m_Shape[3] + + c * m_Shape[2] * m_Shape[3] + + h * m_Shape[3] + + w]; + } + +private: + const TensorShape m_Shape; + DataType* m_Data; +}; + +} //namespace armnn -- cgit v1.2.1