From 6ff3b19ee6120edf015fad8caab2991faa3070af Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Mon, 4 Sep 2017 18:44:23 +0100 Subject: COMPMID-344 Updated doxygen Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae --- src/runtime/CL/CLDistribution1D.cpp | 61 ++++ src/runtime/CL/CLHOG.cpp | 84 +++++ src/runtime/CL/CLLut.cpp | 99 ++++++ src/runtime/CL/CLLutAllocator.cpp | 77 +++++ src/runtime/CL/CLMultiHOG.cpp | 52 ++++ src/runtime/CL/CLMultiImage.cpp | 168 ++++++++++ src/runtime/CL/CLPyramid.cpp | 130 ++++++++ src/runtime/CL/CLScheduler.cpp | 49 +++ src/runtime/CL/CLSubTensor.cpp | 81 +++++ src/runtime/CL/CLTensor.cpp | 73 +++++ src/runtime/CL/CLTensorAllocator.cpp | 87 ++++++ src/runtime/CL/ICLSimpleFunction.cpp | 42 +++ src/runtime/CL/functions/CLAbsoluteDifference.cpp | 38 +++ src/runtime/CL/functions/CLAccumulate.cpp | 52 ++++ src/runtime/CL/functions/CLActivationLayer.cpp | 36 +++ src/runtime/CL/functions/CLArithmeticAddition.cpp | 38 +++ .../CL/functions/CLArithmeticSubtraction.cpp | 38 +++ .../CL/functions/CLBatchNormalizationLayer.cpp | 48 +++ src/runtime/CL/functions/CLBitwiseAnd.cpp | 38 +++ src/runtime/CL/functions/CLBitwiseNot.cpp | 38 +++ src/runtime/CL/functions/CLBitwiseOr.cpp | 38 +++ src/runtime/CL/functions/CLBitwiseXor.cpp | 38 +++ src/runtime/CL/functions/CLBox3x3.cpp | 40 +++ src/runtime/CL/functions/CLCannyEdge.cpp | 155 ++++++++++ src/runtime/CL/functions/CLChannelCombine.cpp | 45 +++ src/runtime/CL/functions/CLChannelExtract.cpp | 45 +++ src/runtime/CL/functions/CLColorConvert.cpp | 59 ++++ src/runtime/CL/functions/CLConvolution.cpp | 114 +++++++ src/runtime/CL/functions/CLConvolutionLayer.cpp | 247 +++++++++++++++ src/runtime/CL/functions/CLDepthConcatenate.cpp | 71 +++++ src/runtime/CL/functions/CLDepthConvert.cpp | 38 +++ src/runtime/CL/functions/CLDerivative.cpp | 40 +++ src/runtime/CL/functions/CLDilate.cpp | 40 +++ src/runtime/CL/functions/CLEqualizeHistogram.cpp | 110 +++++++ src/runtime/CL/functions/CLErode.cpp | 40 +++ src/runtime/CL/functions/CLFastCorners.cpp | 127 ++++++++ src/runtime/CL/functions/CLFillBorder.cpp | 38 +++ src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 343 ++++++++++++++++++++ src/runtime/CL/functions/CLGEMM.cpp | 145 +++++++++ src/runtime/CL/functions/CLGEMMInterleave4x4.cpp | 36 +++ src/runtime/CL/functions/CLGEMMLowp.cpp | 85 +++++ src/runtime/CL/functions/CLGaussian3x3.cpp | 40 +++ src/runtime/CL/functions/CLGaussian5x5.cpp | 62 ++++ src/runtime/CL/functions/CLGaussianPyramid.cpp | 183 +++++++++++ src/runtime/CL/functions/CLHOGDescriptor.cpp | 99 ++++++ src/runtime/CL/functions/CLHOGDetector.cpp | 69 +++++ src/runtime/CL/functions/CLHOGGradient.cpp | 75 +++++ src/runtime/CL/functions/CLHOGMultiDetection.cpp | 240 ++++++++++++++ src/runtime/CL/functions/CLHarrisCorners.cpp | 157 ++++++++++ src/runtime/CL/functions/CLHistogram.cpp | 45 +++ src/runtime/CL/functions/CLIntegralImage.cpp | 46 +++ src/runtime/CL/functions/CLLaplacianPyramid.cpp | 99 ++++++ .../CL/functions/CLLaplacianReconstruct.cpp | 99 ++++++ .../CL/functions/CLLocallyConnectedLayer.cpp | 131 ++++++++ src/runtime/CL/functions/CLMagnitude.cpp | 38 +++ src/runtime/CL/functions/CLMeanStdDev.cpp | 53 ++++ src/runtime/CL/functions/CLMedian3x3.cpp | 40 +++ src/runtime/CL/functions/CLMinMaxLocation.cpp | 98 ++++++ src/runtime/CL/functions/CLNonLinearFilter.cpp | 40 +++ .../CL/functions/CLNonMaximaSuppression3x3.cpp | 47 +++ src/runtime/CL/functions/CLNormalizationLayer.cpp | 60 ++++ src/runtime/CL/functions/CLOpticalFlow.cpp | 150 +++++++++ src/runtime/CL/functions/CLPhase.cpp | 38 +++ .../CL/functions/CLPixelWiseMultiplication.cpp | 39 +++ src/runtime/CL/functions/CLPoolingLayer.cpp | 41 +++ src/runtime/CL/functions/CLRemap.cpp | 50 +++ src/runtime/CL/functions/CLScale.cpp | 45 +++ src/runtime/CL/functions/CLScharr3x3.cpp | 40 +++ src/runtime/CL/functions/CLSobel3x3.cpp | 40 +++ src/runtime/CL/functions/CLSobel5x5.cpp | 81 +++++ src/runtime/CL/functions/CLSobel7x7.cpp | 81 +++++ src/runtime/CL/functions/CLSoftmaxLayer.cpp | 67 ++++ src/runtime/CL/functions/CLTableLookup.cpp | 38 +++ src/runtime/CL/functions/CLThreshold.cpp | 38 +++ src/runtime/CL/functions/CLTranspose.cpp | 38 +++ src/runtime/CL/functions/CLWarpAffine.cpp | 40 +++ src/runtime/CL/functions/CLWarpPerspective.cpp | 40 +++ src/runtime/CPP/CPPScheduler.cpp | 225 ++++++++++++++ src/runtime/CPP/SingleThreadScheduler.cpp | 52 ++++ src/runtime/Distribution1D.cpp | 42 +++ src/runtime/HOG.cpp | 51 +++ src/runtime/ILutAllocator.cpp | 58 ++++ src/runtime/ITensorAllocator.cpp | 51 +++ src/runtime/Lut.cpp | 75 +++++ src/runtime/LutAllocator.cpp | 52 ++++ src/runtime/MultiHOG.cpp | 52 ++++ src/runtime/MultiImage.cpp | 220 +++++++++++++ src/runtime/NEON/INESimpleFunction.cpp | 39 +++ .../NEON/functions/NEAbsoluteDifference.cpp | 38 +++ src/runtime/NEON/functions/NEAccumulate.cpp | 61 ++++ src/runtime/NEON/functions/NEActivationLayer.cpp | 36 +++ .../NEON/functions/NEArithmeticAddition.cpp | 38 +++ .../NEON/functions/NEArithmeticSubtraction.cpp | 38 +++ .../NEON/functions/NEBatchNormalizationLayer.cpp | 49 +++ src/runtime/NEON/functions/NEBitwiseAnd.cpp | 38 +++ src/runtime/NEON/functions/NEBitwiseNot.cpp | 38 +++ src/runtime/NEON/functions/NEBitwiseOr.cpp | 38 +++ src/runtime/NEON/functions/NEBitwiseXor.cpp | 38 +++ src/runtime/NEON/functions/NEBox3x3.cpp | 49 +++ src/runtime/NEON/functions/NECannyEdge.cpp | 169 ++++++++++ src/runtime/NEON/functions/NEChannelCombine.cpp | 45 +++ src/runtime/NEON/functions/NEChannelExtract.cpp | 45 +++ src/runtime/NEON/functions/NEColorConvert.cpp | 59 ++++ src/runtime/NEON/functions/NEConvolution.cpp | 120 +++++++ src/runtime/NEON/functions/NEConvolutionLayer.cpp | 246 +++++++++++++++ src/runtime/NEON/functions/NEDepthConcatenate.cpp | 67 ++++ src/runtime/NEON/functions/NEDepthConvert.cpp | 44 +++ src/runtime/NEON/functions/NEDerivative.cpp | 52 ++++ src/runtime/NEON/functions/NEDilate.cpp | 40 +++ .../NEON/functions/NEDirectConvolutionLayer.cpp | 75 +++++ src/runtime/NEON/functions/NEEqualizeHistogram.cpp | 62 ++++ src/runtime/NEON/functions/NEErode.cpp | 40 +++ src/runtime/NEON/functions/NEFastCorners.cpp | 101 ++++++ src/runtime/NEON/functions/NEFillBorder.cpp | 39 +++ .../NEON/functions/NEFullyConnectedLayer.cpp | 344 +++++++++++++++++++++ src/runtime/NEON/functions/NEGEMM.cpp | 156 ++++++++++ src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp | 36 +++ src/runtime/NEON/functions/NEGEMMLowp.cpp | 84 +++++ src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp | 40 +++ src/runtime/NEON/functions/NEGaussian3x3.cpp | 40 +++ src/runtime/NEON/functions/NEGaussian5x5.cpp | 60 ++++ src/runtime/NEON/functions/NEGaussianPyramid.cpp | 183 +++++++++++ src/runtime/NEON/functions/NEHOGDescriptor.cpp | 99 ++++++ src/runtime/NEON/functions/NEHOGDetector.cpp | 36 +++ src/runtime/NEON/functions/NEHOGGradient.cpp | 80 +++++ src/runtime/NEON/functions/NEHOGMultiDetection.cpp | 231 ++++++++++++++ src/runtime/NEON/functions/NEHarrisCorners.cpp | 212 +++++++++++++ src/runtime/NEON/functions/NEHistogram.cpp | 58 ++++ src/runtime/NEON/functions/NEIntegralImage.cpp | 40 +++ src/runtime/NEON/functions/NELaplacianPyramid.cpp | 102 ++++++ .../NEON/functions/NELaplacianReconstruct.cpp | 100 ++++++ .../NEON/functions/NELocallyConnectedLayer.cpp | 131 ++++++++ src/runtime/NEON/functions/NEMagnitude.cpp | 48 +++ src/runtime/NEON/functions/NEMeanStdDev.cpp | 47 +++ src/runtime/NEON/functions/NEMedian3x3.cpp | 40 +++ src/runtime/NEON/functions/NEMinMaxLocation.cpp | 50 +++ src/runtime/NEON/functions/NENonLinearFilter.cpp | 42 +++ .../NEON/functions/NENonMaximaSuppression3x3.cpp | 47 +++ .../NEON/functions/NENormalizationLayer.cpp | 61 ++++ src/runtime/NEON/functions/NEOpticalFlow.cpp | 119 +++++++ src/runtime/NEON/functions/NEPhase.cpp | 38 +++ .../NEON/functions/NEPixelWiseMultiplication.cpp | 38 +++ src/runtime/NEON/functions/NEPoolingLayer.cpp | 41 +++ src/runtime/NEON/functions/NERemap.cpp | 53 ++++ src/runtime/NEON/functions/NEScale.cpp | 171 ++++++++++ src/runtime/NEON/functions/NEScharr3x3.cpp | 40 +++ src/runtime/NEON/functions/NESobel3x3.cpp | 40 +++ src/runtime/NEON/functions/NESobel5x5.cpp | 81 +++++ src/runtime/NEON/functions/NESobel7x7.cpp | 81 +++++ src/runtime/NEON/functions/NESoftmaxLayer.cpp | 72 +++++ src/runtime/NEON/functions/NETableLookup.cpp | 38 +++ src/runtime/NEON/functions/NEThreshold.cpp | 38 +++ src/runtime/NEON/functions/NETranspose.cpp | 38 +++ src/runtime/NEON/functions/NEWarpAffine.cpp | 62 ++++ src/runtime/NEON/functions/NEWarpPerspective.cpp | 62 ++++ src/runtime/OMP/OMPScheduler.cpp | 83 +++++ src/runtime/Pyramid.cpp | 120 +++++++ src/runtime/Scheduler.cpp | 149 +++++++++ src/runtime/SubTensor.cpp | 57 ++++ src/runtime/Tensor.cpp | 51 +++ src/runtime/TensorAllocator.cpp | 119 +++++++ src/runtime/Utils.cpp | 42 +++ 162 files changed, 12392 insertions(+) create mode 100644 src/runtime/CL/CLDistribution1D.cpp create mode 100644 src/runtime/CL/CLHOG.cpp create mode 100644 src/runtime/CL/CLLut.cpp create mode 100644 src/runtime/CL/CLLutAllocator.cpp create mode 100644 src/runtime/CL/CLMultiHOG.cpp create mode 100644 src/runtime/CL/CLMultiImage.cpp create mode 100644 src/runtime/CL/CLPyramid.cpp create mode 100644 src/runtime/CL/CLScheduler.cpp create mode 100644 src/runtime/CL/CLSubTensor.cpp create mode 100644 src/runtime/CL/CLTensor.cpp create mode 100644 src/runtime/CL/CLTensorAllocator.cpp create mode 100644 src/runtime/CL/ICLSimpleFunction.cpp create mode 100644 src/runtime/CL/functions/CLAbsoluteDifference.cpp create mode 100644 src/runtime/CL/functions/CLAccumulate.cpp create mode 100644 src/runtime/CL/functions/CLActivationLayer.cpp create mode 100644 src/runtime/CL/functions/CLArithmeticAddition.cpp create mode 100644 src/runtime/CL/functions/CLArithmeticSubtraction.cpp create mode 100644 src/runtime/CL/functions/CLBatchNormalizationLayer.cpp create mode 100644 src/runtime/CL/functions/CLBitwiseAnd.cpp create mode 100644 src/runtime/CL/functions/CLBitwiseNot.cpp create mode 100644 src/runtime/CL/functions/CLBitwiseOr.cpp create mode 100644 src/runtime/CL/functions/CLBitwiseXor.cpp create mode 100644 src/runtime/CL/functions/CLBox3x3.cpp create mode 100644 src/runtime/CL/functions/CLCannyEdge.cpp create mode 100644 src/runtime/CL/functions/CLChannelCombine.cpp create mode 100644 src/runtime/CL/functions/CLChannelExtract.cpp create mode 100644 src/runtime/CL/functions/CLColorConvert.cpp create mode 100644 src/runtime/CL/functions/CLConvolution.cpp create mode 100644 src/runtime/CL/functions/CLConvolutionLayer.cpp create mode 100644 src/runtime/CL/functions/CLDepthConcatenate.cpp create mode 100644 src/runtime/CL/functions/CLDepthConvert.cpp create mode 100644 src/runtime/CL/functions/CLDerivative.cpp create mode 100644 src/runtime/CL/functions/CLDilate.cpp create mode 100644 src/runtime/CL/functions/CLEqualizeHistogram.cpp create mode 100644 src/runtime/CL/functions/CLErode.cpp create mode 100644 src/runtime/CL/functions/CLFastCorners.cpp create mode 100644 src/runtime/CL/functions/CLFillBorder.cpp create mode 100644 src/runtime/CL/functions/CLFullyConnectedLayer.cpp create mode 100644 src/runtime/CL/functions/CLGEMM.cpp create mode 100644 src/runtime/CL/functions/CLGEMMInterleave4x4.cpp create mode 100644 src/runtime/CL/functions/CLGEMMLowp.cpp create mode 100644 src/runtime/CL/functions/CLGaussian3x3.cpp create mode 100644 src/runtime/CL/functions/CLGaussian5x5.cpp create mode 100644 src/runtime/CL/functions/CLGaussianPyramid.cpp create mode 100644 src/runtime/CL/functions/CLHOGDescriptor.cpp create mode 100644 src/runtime/CL/functions/CLHOGDetector.cpp create mode 100644 src/runtime/CL/functions/CLHOGGradient.cpp create mode 100644 src/runtime/CL/functions/CLHOGMultiDetection.cpp create mode 100644 src/runtime/CL/functions/CLHarrisCorners.cpp create mode 100644 src/runtime/CL/functions/CLHistogram.cpp create mode 100644 src/runtime/CL/functions/CLIntegralImage.cpp create mode 100644 src/runtime/CL/functions/CLLaplacianPyramid.cpp create mode 100644 src/runtime/CL/functions/CLLaplacianReconstruct.cpp create mode 100644 src/runtime/CL/functions/CLLocallyConnectedLayer.cpp create mode 100644 src/runtime/CL/functions/CLMagnitude.cpp create mode 100644 src/runtime/CL/functions/CLMeanStdDev.cpp create mode 100644 src/runtime/CL/functions/CLMedian3x3.cpp create mode 100644 src/runtime/CL/functions/CLMinMaxLocation.cpp create mode 100644 src/runtime/CL/functions/CLNonLinearFilter.cpp create mode 100644 src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp create mode 100644 src/runtime/CL/functions/CLNormalizationLayer.cpp create mode 100644 src/runtime/CL/functions/CLOpticalFlow.cpp create mode 100644 src/runtime/CL/functions/CLPhase.cpp create mode 100644 src/runtime/CL/functions/CLPixelWiseMultiplication.cpp create mode 100644 src/runtime/CL/functions/CLPoolingLayer.cpp create mode 100644 src/runtime/CL/functions/CLRemap.cpp create mode 100644 src/runtime/CL/functions/CLScale.cpp create mode 100644 src/runtime/CL/functions/CLScharr3x3.cpp create mode 100644 src/runtime/CL/functions/CLSobel3x3.cpp create mode 100644 src/runtime/CL/functions/CLSobel5x5.cpp create mode 100644 src/runtime/CL/functions/CLSobel7x7.cpp create mode 100644 src/runtime/CL/functions/CLSoftmaxLayer.cpp create mode 100644 src/runtime/CL/functions/CLTableLookup.cpp create mode 100644 src/runtime/CL/functions/CLThreshold.cpp create mode 100644 src/runtime/CL/functions/CLTranspose.cpp create mode 100644 src/runtime/CL/functions/CLWarpAffine.cpp create mode 100644 src/runtime/CL/functions/CLWarpPerspective.cpp create mode 100644 src/runtime/CPP/CPPScheduler.cpp create mode 100644 src/runtime/CPP/SingleThreadScheduler.cpp create mode 100644 src/runtime/Distribution1D.cpp create mode 100644 src/runtime/HOG.cpp create mode 100644 src/runtime/ILutAllocator.cpp create mode 100644 src/runtime/ITensorAllocator.cpp create mode 100644 src/runtime/Lut.cpp create mode 100644 src/runtime/LutAllocator.cpp create mode 100644 src/runtime/MultiHOG.cpp create mode 100644 src/runtime/MultiImage.cpp create mode 100644 src/runtime/NEON/INESimpleFunction.cpp create mode 100644 src/runtime/NEON/functions/NEAbsoluteDifference.cpp create mode 100644 src/runtime/NEON/functions/NEAccumulate.cpp create mode 100644 src/runtime/NEON/functions/NEActivationLayer.cpp create mode 100644 src/runtime/NEON/functions/NEArithmeticAddition.cpp create mode 100644 src/runtime/NEON/functions/NEArithmeticSubtraction.cpp create mode 100644 src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp create mode 100644 src/runtime/NEON/functions/NEBitwiseAnd.cpp create mode 100644 src/runtime/NEON/functions/NEBitwiseNot.cpp create mode 100644 src/runtime/NEON/functions/NEBitwiseOr.cpp create mode 100644 src/runtime/NEON/functions/NEBitwiseXor.cpp create mode 100644 src/runtime/NEON/functions/NEBox3x3.cpp create mode 100644 src/runtime/NEON/functions/NECannyEdge.cpp create mode 100644 src/runtime/NEON/functions/NEChannelCombine.cpp create mode 100644 src/runtime/NEON/functions/NEChannelExtract.cpp create mode 100644 src/runtime/NEON/functions/NEColorConvert.cpp create mode 100644 src/runtime/NEON/functions/NEConvolution.cpp create mode 100644 src/runtime/NEON/functions/NEConvolutionLayer.cpp create mode 100644 src/runtime/NEON/functions/NEDepthConcatenate.cpp create mode 100644 src/runtime/NEON/functions/NEDepthConvert.cpp create mode 100644 src/runtime/NEON/functions/NEDerivative.cpp create mode 100644 src/runtime/NEON/functions/NEDilate.cpp create mode 100644 src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp create mode 100644 src/runtime/NEON/functions/NEEqualizeHistogram.cpp create mode 100644 src/runtime/NEON/functions/NEErode.cpp create mode 100644 src/runtime/NEON/functions/NEFastCorners.cpp create mode 100644 src/runtime/NEON/functions/NEFillBorder.cpp create mode 100644 src/runtime/NEON/functions/NEFullyConnectedLayer.cpp create mode 100644 src/runtime/NEON/functions/NEGEMM.cpp create mode 100644 src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp create mode 100644 src/runtime/NEON/functions/NEGEMMLowp.cpp create mode 100644 src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp create mode 100644 src/runtime/NEON/functions/NEGaussian3x3.cpp create mode 100644 src/runtime/NEON/functions/NEGaussian5x5.cpp create mode 100644 src/runtime/NEON/functions/NEGaussianPyramid.cpp create mode 100644 src/runtime/NEON/functions/NEHOGDescriptor.cpp create mode 100644 src/runtime/NEON/functions/NEHOGDetector.cpp create mode 100644 src/runtime/NEON/functions/NEHOGGradient.cpp create mode 100644 src/runtime/NEON/functions/NEHOGMultiDetection.cpp create mode 100644 src/runtime/NEON/functions/NEHarrisCorners.cpp create mode 100644 src/runtime/NEON/functions/NEHistogram.cpp create mode 100644 src/runtime/NEON/functions/NEIntegralImage.cpp create mode 100644 src/runtime/NEON/functions/NELaplacianPyramid.cpp create mode 100644 src/runtime/NEON/functions/NELaplacianReconstruct.cpp create mode 100644 src/runtime/NEON/functions/NELocallyConnectedLayer.cpp create mode 100644 src/runtime/NEON/functions/NEMagnitude.cpp create mode 100644 src/runtime/NEON/functions/NEMeanStdDev.cpp create mode 100644 src/runtime/NEON/functions/NEMedian3x3.cpp create mode 100644 src/runtime/NEON/functions/NEMinMaxLocation.cpp create mode 100644 src/runtime/NEON/functions/NENonLinearFilter.cpp create mode 100644 src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp create mode 100644 src/runtime/NEON/functions/NENormalizationLayer.cpp create mode 100644 src/runtime/NEON/functions/NEOpticalFlow.cpp create mode 100644 src/runtime/NEON/functions/NEPhase.cpp create mode 100644 src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp create mode 100644 src/runtime/NEON/functions/NEPoolingLayer.cpp create mode 100644 src/runtime/NEON/functions/NERemap.cpp create mode 100644 src/runtime/NEON/functions/NEScale.cpp create mode 100644 src/runtime/NEON/functions/NEScharr3x3.cpp create mode 100644 src/runtime/NEON/functions/NESobel3x3.cpp create mode 100644 src/runtime/NEON/functions/NESobel5x5.cpp create mode 100644 src/runtime/NEON/functions/NESobel7x7.cpp create mode 100644 src/runtime/NEON/functions/NESoftmaxLayer.cpp create mode 100644 src/runtime/NEON/functions/NETableLookup.cpp create mode 100644 src/runtime/NEON/functions/NEThreshold.cpp create mode 100644 src/runtime/NEON/functions/NETranspose.cpp create mode 100644 src/runtime/NEON/functions/NEWarpAffine.cpp create mode 100644 src/runtime/NEON/functions/NEWarpPerspective.cpp create mode 100644 src/runtime/OMP/OMPScheduler.cpp create mode 100644 src/runtime/Pyramid.cpp create mode 100644 src/runtime/Scheduler.cpp create mode 100644 src/runtime/SubTensor.cpp create mode 100644 src/runtime/Tensor.cpp create mode 100644 src/runtime/TensorAllocator.cpp create mode 100644 src/runtime/Utils.cpp (limited to 'src/runtime') diff --git a/src/runtime/CL/CLDistribution1D.cpp b/src/runtime/CL/CLDistribution1D.cpp new file mode 100644 index 0000000000..f1dd95e77e --- /dev/null +++ b/src/runtime/CL/CLDistribution1D.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLDistribution1D.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLDistribution1D::CLDistribution1D(size_t num_bins, int32_t offset, uint32_t range) + : ICLDistribution1D(num_bins, offset, range), _mem(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, num_bins * sizeof(int32_t)) +{ +} + +void CLDistribution1D::map(bool blocking) +{ + ICLDistribution1D::map(CLScheduler::get().queue(), blocking); +} + +void CLDistribution1D::unmap() +{ + ICLDistribution1D::unmap(CLScheduler::get().queue()); +} + +uint32_t *CLDistribution1D::do_map(cl::CommandQueue &q, bool blocking) +{ + ARM_COMPUTE_ERROR_ON(_mem.get() == nullptr); + return static_cast(q.enqueueMapBuffer(_mem, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, size())); +} + +void CLDistribution1D::do_unmap(cl::CommandQueue &q) +{ + ARM_COMPUTE_ERROR_ON(_mem.get() == nullptr); + q.enqueueUnmapMemObject(_mem, _mapping); +} + +cl::Buffer &CLDistribution1D::cl_buffer() +{ + return _mem; +} diff --git a/src/runtime/CL/CLHOG.cpp b/src/runtime/CL/CLHOG.cpp new file mode 100644 index 0000000000..3f5266ce70 --- /dev/null +++ b/src/runtime/CL/CLHOG.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/CLHOG.h" + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLHOG::CLHOG() + : _info(), _buffer() +{ +} + +void CLHOG::init(const HOGInfo &input) +{ + ARM_COMPUTE_ERROR_ON(_buffer.get() != nullptr); + _info = input; + _buffer = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, info()->descriptor_size() * sizeof(float)); +} + +void CLHOG::free() +{ + ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr); + + _buffer = cl::Buffer(); +} + +const HOGInfo *CLHOG::info() const +{ + return &_info; +} + +const cl::Buffer &CLHOG::cl_buffer() const +{ + return _buffer; +} + +void CLHOG::map(bool blocking) +{ + ARM_COMPUTE_ERROR_ON(descriptor() != nullptr); + ICLHOG::map(CLScheduler::get().queue(), blocking); +} + +void CLHOG::unmap() +{ + ARM_COMPUTE_ERROR_ON(descriptor() == nullptr); + ICLHOG::unmap(CLScheduler::get().queue()); +} + +uint8_t *CLHOG::do_map(cl::CommandQueue &q, bool blocking) +{ + ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr); + return static_cast(q.enqueueMapBuffer(_buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, info()->descriptor_size())); +} + +void CLHOG::do_unmap(cl::CommandQueue &q) +{ + ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr); + q.enqueueUnmapMemObject(_buffer, descriptor()); +} \ No newline at end of file diff --git a/src/runtime/CL/CLLut.cpp b/src/runtime/CL/CLLut.cpp new file mode 100644 index 0000000000..a8cbf2131f --- /dev/null +++ b/src/runtime/CL/CLLut.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLLut.h" + +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include + +using namespace arm_compute; + +CLLut::CLLut() + : _allocator() +{ +} + +CLLut::CLLut(size_t num_elements, DataType data_type) + : _allocator() +{ + _allocator.init(num_elements, data_type); +} + +size_t CLLut::num_elements() const +{ + return _allocator.num_elements(); +} + +uint32_t CLLut::index_offset() const +{ + return (DataType::S16 == _allocator.type()) ? num_elements() / 2 : 0; +} + +size_t CLLut::size_in_bytes() const +{ + return _allocator.size(); +} + +DataType CLLut::type() const +{ + return _allocator.type(); +} + +const cl::Buffer &CLLut::cl_buffer() const +{ + return _allocator.cl_data(); +} + +void CLLut::clear() +{ + cl::CommandQueue &q = CLScheduler::get().queue(); + uint8_t *data = _allocator.map(q, true /* blocking */); + std::memset(data, 0, size_in_bytes()); + _allocator.unmap(q, data); +} + +ILutAllocator *CLLut::allocator() +{ + return &_allocator; +} + +void CLLut::map(bool blocking) +{ + ICLLut::map(CLScheduler::get().queue(), blocking); +} + +void CLLut::unmap() +{ + ICLLut::unmap(CLScheduler::get().queue()); +} + +uint8_t *CLLut::do_map(cl::CommandQueue &q, bool blocking) +{ + return _allocator.map(q, blocking); +} + +void CLLut::do_unmap(cl::CommandQueue &q) +{ + _allocator.unmap(q, buffer()); +} diff --git a/src/runtime/CL/CLLutAllocator.cpp b/src/runtime/CL/CLLutAllocator.cpp new file mode 100644 index 0000000000..311de4bb8d --- /dev/null +++ b/src/runtime/CL/CLLutAllocator.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLLutAllocator.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLLutAllocator::CLLutAllocator() + : _buffer(), _mapping(nullptr) +{ +} + +uint8_t *CLLutAllocator::data() +{ + return _mapping; +} + +const cl::Buffer &CLLutAllocator::cl_data() const +{ + return _buffer; +} + +uint8_t *CLLutAllocator::map(cl::CommandQueue &q, bool blocking) +{ + ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr); + return static_cast(q.enqueueMapBuffer(_buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, size())); +} + +void CLLutAllocator::unmap(cl::CommandQueue &q, uint8_t *mapping) +{ + ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr); + q.enqueueUnmapMemObject(_buffer, mapping); +} + +void CLLutAllocator::allocate() +{ + _buffer = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, size()); +} + +uint8_t *CLLutAllocator::lock() +{ + ARM_COMPUTE_ERROR_ON(_mapping != nullptr); + cl::CommandQueue q = CLScheduler::get().queue(); + _mapping = map(q, true); + return _mapping; +} + +void CLLutAllocator::unlock() +{ + ARM_COMPUTE_ERROR_ON(_mapping == nullptr); + cl::CommandQueue q = CLScheduler::get().queue(); + unmap(q, _mapping); + _mapping = nullptr; +} diff --git a/src/runtime/CL/CLMultiHOG.cpp b/src/runtime/CL/CLMultiHOG.cpp new file mode 100644 index 0000000000..b9e8739454 --- /dev/null +++ b/src/runtime/CL/CLMultiHOG.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLMultiHOG.h" + +#include "arm_compute/core/CL/ICLHOG.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" + +using namespace arm_compute; + +CLMultiHOG::CLMultiHOG(size_t num_models) + : _num_models(num_models), _model(arm_compute::cpp14::make_unique(_num_models)) +{ +} + +size_t CLMultiHOG::num_models() const +{ + return _num_models; +} + +ICLHOG *CLMultiHOG::cl_model(size_t index) +{ + ARM_COMPUTE_ERROR_ON(index >= _num_models); + return (_model.get() + index); +} + +const ICLHOG *CLMultiHOG::cl_model(size_t index) const +{ + ARM_COMPUTE_ERROR_ON(index >= _num_models); + return (_model.get() + index); +} \ No newline at end of file diff --git a/src/runtime/CL/CLMultiImage.cpp b/src/runtime/CL/CLMultiImage.cpp new file mode 100644 index 0000000000..63059cb5f4 --- /dev/null +++ b/src/runtime/CL/CLMultiImage.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLMultiImage.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +using namespace arm_compute; + +CLMultiImage::CLMultiImage() + : _info(), _plane() +{ +} + +const MultiImageInfo *CLMultiImage::info() const +{ + return &_info; +} + +void CLMultiImage::init(unsigned int width, unsigned int height, Format format) +{ + internal_init(width, height, format, false); +} + +void CLMultiImage::init_auto_padding(unsigned int width, unsigned int height, Format format) +{ + internal_init(width, height, format, true); +} + +void CLMultiImage::internal_init(unsigned int width, unsigned int height, Format format, bool auto_padding) +{ + TensorInfo info(width, height, Format::U8); + + if(auto_padding) + { + info.auto_padding(); + } + + switch(format) + { + case Format::U8: + case Format::S16: + case Format::U16: + case Format::S32: + case Format::F16: + case Format::F32: + case Format::U32: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + { + TensorInfo info_full(width, height, format); + + if(auto_padding) + { + info_full.auto_padding(); + } + + std::get<0>(_plane).allocator()->init(info_full); + break; + } + case Format::NV12: + case Format::NV21: + { + TensorInfo info_uv88(width / 2, height / 2, Format::UV88); + + if(auto_padding) + { + info_uv88.auto_padding(); + } + + std::get<0>(_plane).allocator()->init(info); + std::get<1>(_plane).allocator()->init(info_uv88); + break; + } + case Format::IYUV: + { + TensorInfo info_sub2(width / 2, height / 2, Format::U8); + + if(auto_padding) + { + info_sub2.auto_padding(); + } + + std::get<0>(_plane).allocator()->init(info); + std::get<1>(_plane).allocator()->init(info_sub2); + std::get<2>(_plane).allocator()->init(info_sub2); + break; + } + case Format::YUV444: + std::get<0>(_plane).allocator()->init(info); + std::get<1>(_plane).allocator()->init(info); + std::get<2>(_plane).allocator()->init(info); + break; + default: + ARM_COMPUTE_ERROR("Not supported"); + break; + } + + _info.init(width, height, format); +} + +void CLMultiImage::allocate() +{ + switch(_info.format()) + { + case Format::U8: + case Format::S16: + case Format::U16: + case Format::S32: + case Format::F16: + case Format::F32: + case Format::U32: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + std::get<0>(_plane).allocator()->allocate(); + break; + case Format::NV12: + case Format::NV21: + std::get<0>(_plane).allocator()->allocate(); + std::get<1>(_plane).allocator()->allocate(); + break; + case Format::IYUV: + case Format::YUV444: + std::get<0>(_plane).allocator()->allocate(); + std::get<1>(_plane).allocator()->allocate(); + std::get<2>(_plane).allocator()->allocate(); + break; + default: + ARM_COMPUTE_ERROR("Not supported"); + break; + } +} + +CLImage *CLMultiImage::cl_plane(unsigned int index) +{ + return &_plane[index]; +} + +const CLImage *CLMultiImage::cl_plane(unsigned int index) const +{ + return &_plane[index]; +} diff --git a/src/runtime/CL/CLPyramid.cpp b/src/runtime/CL/CLPyramid.cpp new file mode 100644 index 0000000000..41d81ea0f8 --- /dev/null +++ b/src/runtime/CL/CLPyramid.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLPyramid.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/TensorShape.h" + +#include +#include + +using namespace arm_compute; + +CLPyramid::CLPyramid() + : _info(), _pyramid(nullptr) +{ +} + +void CLPyramid::init(const PyramidInfo &info) +{ + internal_init(info, false); +} + +void CLPyramid::init_auto_padding(const PyramidInfo &info) +{ + internal_init(info, true); +} + +void CLPyramid::internal_init(const PyramidInfo &info, bool auto_padding) +{ + _info = info; + _pyramid = arm_compute::cpp14::make_unique(_info.num_levels()); + + size_t w = _info.width(); + size_t h = _info.height(); + size_t ref_w = w; + size_t ref_h = h; + const bool is_orb_scale = (SCALE_PYRAMID_ORB == _info.scale()); + TensorShape tensor_shape = _info.tensor_shape(); + + // Note: Look-up table used by the OpenVX sample implementation + const std::array c_orbscale = + { + { + 0.5f, + SCALE_PYRAMID_ORB, + SCALE_PYRAMID_ORB * SCALE_PYRAMID_ORB, + SCALE_PYRAMID_ORB *SCALE_PYRAMID_ORB * SCALE_PYRAMID_ORB + } + }; + + for(size_t i = 0; i < _info.num_levels(); ++i) + { + TensorInfo tensor_info(tensor_shape, _info.format()); + + if(auto_padding) + { + tensor_info.auto_padding(); + } + + _pyramid[i].allocator()->init(tensor_info); + + if(is_orb_scale) + { + const float orb_scale = c_orbscale[(i + 1) % 4]; + w = std::ceil(ref_w * orb_scale); + h = std::ceil(ref_h * orb_scale); + + if(0 == ((i + 1) % 4)) + { + ref_w = w; + ref_h = h; + } + } + else + { + w = (w + 1) * _info.scale(); + h = (h + 1) * _info.scale(); + } + + // Update tensor_shape + tensor_shape.set(0, w); + tensor_shape.set(1, h); + } +} + +void CLPyramid::allocate() +{ + ARM_COMPUTE_ERROR_ON(_pyramid == nullptr); + + for(size_t i = 0; i < _info.num_levels(); ++i) + { + (_pyramid.get() + i)->allocator()->allocate(); + } +} + +const PyramidInfo *CLPyramid::info() const +{ + return &_info; +} + +CLTensor *CLPyramid::get_pyramid_level(size_t index) const +{ + ARM_COMPUTE_ERROR_ON(index >= _info.num_levels()); + + return (_pyramid.get() + index); +} diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp new file mode 100644 index 0000000000..fe25ce534c --- /dev/null +++ b/src/runtime/CL/CLScheduler.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include "arm_compute/core/CL/ICLKernel.h" + +using namespace arm_compute; + +CLScheduler::CLScheduler() + : _context(), _queue(), _target(GPUTarget::MIDGARD) +{ +} + +CLScheduler &CLScheduler::get() +{ + static CLScheduler scheduler; + return scheduler; +} + +void CLScheduler::enqueue(ICLKernel &kernel, bool flush) +{ + kernel.run(kernel.window(), _queue); + + if(flush) + { + _queue.flush(); + } +} diff --git a/src/runtime/CL/CLSubTensor.cpp b/src/runtime/CL/CLSubTensor.cpp new file mode 100644 index 0000000000..b228c0abda --- /dev/null +++ b/src/runtime/CL/CLSubTensor.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLSubTensor.h" + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLSubTensor::CLSubTensor(ICLTensor *parent, const TensorShape &tensor_shape, const Coordinates &coords) + : _parent(nullptr), _info() +{ + ARM_COMPUTE_ERROR_ON(parent == nullptr); + _info = SubTensorInfo(parent->info(), tensor_shape, coords); + _parent = parent; +} + +ITensorInfo *CLSubTensor::info() const +{ + return &_info; +} + +ITensorInfo *CLSubTensor::info() +{ + return &_info; +} + +const cl::Buffer &CLSubTensor::cl_buffer() const +{ + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->cl_buffer(); +} + +ICLTensor *CLSubTensor::parent() +{ + return _parent; +} + +void CLSubTensor::map(bool blocking) +{ + ICLTensor::map(CLScheduler::get().queue(), blocking); +} + +void CLSubTensor::unmap() +{ + ICLTensor::unmap(CLScheduler::get().queue()); +} + +uint8_t *CLSubTensor::do_map(cl::CommandQueue &q, bool blocking) +{ + ARM_COMPUTE_ERROR_ON(cl_buffer().get() == nullptr); + return static_cast(q.enqueueMapBuffer(cl_buffer(), blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, info()->total_size())); +} + +void CLSubTensor::do_unmap(cl::CommandQueue &q) +{ + ARM_COMPUTE_ERROR_ON(cl_buffer().get() == nullptr); + q.enqueueUnmapMemObject(cl_buffer(), buffer()); +} diff --git a/src/runtime/CL/CLTensor.cpp b/src/runtime/CL/CLTensor.cpp new file mode 100644 index 0000000000..eefa0331d5 --- /dev/null +++ b/src/runtime/CL/CLTensor.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLTensor.h" + +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLTensor::CLTensor() + : _allocator() +{ +} + +TensorInfo *CLTensor::info() const +{ + return &_allocator.info(); +} + +TensorInfo *CLTensor::info() +{ + return &_allocator.info(); +} + +const cl::Buffer &CLTensor::cl_buffer() const +{ + return _allocator.cl_data(); +} + +ITensorAllocator *CLTensor::allocator() +{ + return &_allocator; +} + +void CLTensor::map(bool blocking) +{ + ICLTensor::map(CLScheduler::get().queue(), blocking); +} + +void CLTensor::unmap() +{ + ICLTensor::unmap(CLScheduler::get().queue()); +} + +uint8_t *CLTensor::do_map(cl::CommandQueue &q, bool blocking) +{ + return _allocator.map(q, blocking); +} + +void CLTensor::do_unmap(cl::CommandQueue &q) +{ + _allocator.unmap(q, buffer()); +} diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp new file mode 100644 index 0000000000..8112a7148f --- /dev/null +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLTensorAllocator.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLTensorAllocator::CLTensorAllocator() + : _buffer(), _mapping(nullptr) +{ +} + +uint8_t *CLTensorAllocator::data() +{ + return _mapping; +} + +const cl::Buffer &CLTensorAllocator::cl_data() const +{ + return _buffer; +} + +void CLTensorAllocator::allocate() +{ + ARM_COMPUTE_ERROR_ON(_buffer.get() != nullptr); + + _buffer = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, info().total_size()); + info().set_is_resizable(false); +} + +void CLTensorAllocator::free() +{ + ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr); + + _buffer = cl::Buffer(); + info().set_is_resizable(true); +} + +uint8_t *CLTensorAllocator::lock() +{ + ARM_COMPUTE_ERROR_ON(_mapping != nullptr); + _mapping = map(CLScheduler::get().queue(), true); + return _mapping; +} + +void CLTensorAllocator::unlock() +{ + ARM_COMPUTE_ERROR_ON(_mapping == nullptr); + unmap(CLScheduler::get().queue(), _mapping); + _mapping = nullptr; +} + +uint8_t *CLTensorAllocator::map(cl::CommandQueue &q, bool blocking) +{ + ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr); + return static_cast(q.enqueueMapBuffer(_buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, info().total_size())); +} + +void CLTensorAllocator::unmap(cl::CommandQueue &q, uint8_t *mapping) +{ + ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr); + q.enqueueUnmapMemObject(_buffer, mapping); +} diff --git a/src/runtime/CL/ICLSimpleFunction.cpp b/src/runtime/CL/ICLSimpleFunction.cpp new file mode 100644 index 0000000000..aa45743d37 --- /dev/null +++ b/src/runtime/CL/ICLSimpleFunction.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +ICLSimpleFunction::ICLSimpleFunction() + : _kernel(), _border_handler() +{ +} + +void ICLSimpleFunction::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The child class didn't set the CL kernel or function isn't configured"); + + CLScheduler::get().enqueue(_border_handler, false); + CLScheduler::get().enqueue(*_kernel); +} diff --git a/src/runtime/CL/functions/CLAbsoluteDifference.cpp b/src/runtime/CL/functions/CLAbsoluteDifference.cpp new file mode 100644 index 0000000000..5097dd4710 --- /dev/null +++ b/src/runtime/CL/functions/CLAbsoluteDifference.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h" + +#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLAbsoluteDifference::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLAccumulate.cpp b/src/runtime/CL/functions/CLAccumulate.cpp new file mode 100644 index 0000000000..56c519984c --- /dev/null +++ b/src/runtime/CL/functions/CLAccumulate.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLAccumulate.h" + +#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLAccumulate::configure(const ICLTensor *input, ICLTensor *accum) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, accum); + _kernel = std::move(k); +} + +void CLAccumulateWeighted::configure(const ICLTensor *input, float alpha, ICLTensor *accum) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, alpha, accum); + _kernel = std::move(k); +} + +void CLAccumulateSquared::configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, shift, accum); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp new file mode 100644 index 0000000000..9b5bd8b663 --- /dev/null +++ b/src/runtime/CL/functions/CLActivationLayer.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" + +#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" +#include "arm_compute/core/Helpers.h" + +using namespace arm_compute; + +void CLActivationLayer::configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, act_info); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLArithmeticAddition.cpp b/src/runtime/CL/functions/CLArithmeticAddition.cpp new file mode 100644 index 0000000000..36bff4285c --- /dev/null +++ b/src/runtime/CL/functions/CLArithmeticAddition.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" + +#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLArithmeticAddition::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output, policy); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLArithmeticSubtraction.cpp b/src/runtime/CL/functions/CLArithmeticSubtraction.cpp new file mode 100644 index 0000000000..97f0a1caf4 --- /dev/null +++ b/src/runtime/CL/functions/CLArithmeticSubtraction.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" + +#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLArithmeticSubtraction::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output, policy); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp new file mode 100644 index 0000000000..3df673c6a6 --- /dev/null +++ b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLBatchNormalizationLayer::CLBatchNormalizationLayer() + : _norm_kernel() +{ +} + +void CLBatchNormalizationLayer::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon) +{ + _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon); +} + +void CLBatchNormalizationLayer::run() +{ + CLScheduler::get().enqueue(_norm_kernel, true); +} diff --git a/src/runtime/CL/functions/CLBitwiseAnd.cpp b/src/runtime/CL/functions/CLBitwiseAnd.cpp new file mode 100644 index 0000000000..7c85043206 --- /dev/null +++ b/src/runtime/CL/functions/CLBitwiseAnd.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h" + +#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLBitwiseAnd::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLBitwiseNot.cpp b/src/runtime/CL/functions/CLBitwiseNot.cpp new file mode 100644 index 0000000000..17ae5dea3c --- /dev/null +++ b/src/runtime/CL/functions/CLBitwiseNot.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLBitwiseNot.h" + +#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLBitwiseNot::configure(const ICLTensor *input, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLBitwiseOr.cpp b/src/runtime/CL/functions/CLBitwiseOr.cpp new file mode 100644 index 0000000000..c84a279bae --- /dev/null +++ b/src/runtime/CL/functions/CLBitwiseOr.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLBitwiseOr.h" + +#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLBitwiseOr::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLBitwiseXor.cpp b/src/runtime/CL/functions/CLBitwiseXor.cpp new file mode 100644 index 0000000000..fd49c7d818 --- /dev/null +++ b/src/runtime/CL/functions/CLBitwiseXor.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLBitwiseXor.h" + +#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLBitwiseXor::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLBox3x3.cpp b/src/runtime/CL/functions/CLBox3x3.cpp new file mode 100644 index 0000000000..8de6807c73 --- /dev/null +++ b/src/runtime/CL/functions/CLBox3x3.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLBox3x3.h" + +#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void CLBox3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, 1, border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLCannyEdge.cpp b/src/runtime/CL/functions/CLCannyEdge.cpp new file mode 100644 index 0000000000..1d018b8347 --- /dev/null +++ b/src/runtime/CL/functions/CLCannyEdge.cpp @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLCannyEdge.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/functions/CLSobel3x3.h" +#include "arm_compute/runtime/CL/functions/CLSobel5x5.h" +#include "arm_compute/runtime/CL/functions/CLSobel7x7.h" + +using namespace arm_compute; + +CLCannyEdge::CLCannyEdge() + : _sobel(nullptr), _gradient(), _border_mag_gradient(), _non_max_suppr(), _edge_trace(), _gx(), _gy(), _mag(), _phase(), _nonmax(), _visited(), _recorded(), _l1_list_counter(), _l1_stack() +{ +} + +void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON((1 != norm_type) && (2 != norm_type)); + ARM_COMPUTE_ERROR_ON(lower_thr > upper_thr); + + const unsigned int L1_hysteresis_stack_size = 8; + const TensorShape shape = input->info()->tensor_shape(); + + TensorInfo gradient_info; + TensorInfo info; + + // Initialize images + if(gradient_size < 7) + { + gradient_info.init(shape, 1, arm_compute::DataType::S16); + info.init(shape, 1, arm_compute::DataType::U16); + } + else + { + gradient_info.init(shape, 1, arm_compute::DataType::S32); + info.init(shape, 1, arm_compute::DataType::U32); + } + + _gx.allocator()->init(gradient_info); + _gy.allocator()->init(gradient_info); + _mag.allocator()->init(info); + _nonmax.allocator()->init(info); + + TensorInfo info_u8(shape, 1, arm_compute::DataType::U8); + _phase.allocator()->init(info_u8); + _l1_list_counter.allocator()->init(info_u8); + + TensorInfo info_u32(shape, 1, arm_compute::DataType::U32); + _visited.allocator()->init(info_u32); + _recorded.allocator()->init(info_u32); + + TensorShape shape_l1_stack = input->info()->tensor_shape(); + shape_l1_stack.set(0, input->info()->dimension(0) * L1_hysteresis_stack_size); + TensorInfo info_s32(shape_l1_stack, 1, arm_compute::DataType::S32); + _l1_stack.allocator()->init(info_s32); + + // Configure/Init sobelNxN + if(gradient_size == 3) + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + } + else if(gradient_size == 5) + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + } + else if(gradient_size == 7) + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + } + else + { + ARM_COMPUTE_ERROR("Gradient %d size not supported", gradient_size); + } + + // Configure gradient + _gradient.configure(&_gx, &_gy, &_mag, &_phase, norm_type); + + // Configure non-maxima suppression + _non_max_suppr.configure(&_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED); + + // Fill border around magnitude image as non-maxima suppression will access + // it. If border mode is undefined filling the border is a nop. + _border_mag_gradient.configure(&_mag, _non_max_suppr.border_size(), border_mode, constant_border_value); + + // Configure edge tracing + _edge_trace.configure(&_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter); + + _gx.allocator()->allocate(); + _gy.allocator()->allocate(); + _phase.allocator()->allocate(); + _mag.allocator()->allocate(); + _visited.allocator()->allocate(); + _recorded.allocator()->allocate(); + _l1_stack.allocator()->allocate(); + _l1_list_counter.allocator()->allocate(); + _nonmax.allocator()->allocate(); +} + +void CLCannyEdge::run() +{ + // Run sobel + _sobel->run(); + + // Run phase and magnitude calculation + CLScheduler::get().enqueue(_gradient, false); + + // Fill border before non-maxima suppression. Nop for border mode undefined. + CLScheduler::get().enqueue(_border_mag_gradient, false); + + // Run non max suppresion + _nonmax.clear(CLScheduler::get().queue()); + CLScheduler::get().enqueue(_non_max_suppr, false); + + // Clear temporary structures and run edge trace + _visited.clear(CLScheduler::get().queue()); + _recorded.clear(CLScheduler::get().queue()); + _l1_list_counter.clear(CLScheduler::get().queue()); + _l1_stack.clear(CLScheduler::get().queue()); + CLScheduler::get().enqueue(_edge_trace, true); +} diff --git a/src/runtime/CL/functions/CLChannelCombine.cpp b/src/runtime/CL/functions/CLChannelCombine.cpp new file mode 100644 index 0000000000..79a3676bd7 --- /dev/null +++ b/src/runtime/CL/functions/CLChannelCombine.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLChannelCombine.h" + +#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLChannelCombine::configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(plane0, plane1, plane2, plane3, output); + _kernel = std::move(k); +} + +void CLChannelCombine::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(plane0, plane1, plane2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLChannelExtract.cpp b/src/runtime/CL/functions/CLChannelExtract.cpp new file mode 100644 index 0000000000..2c6174b9ee --- /dev/null +++ b/src/runtime/CL/functions/CLChannelExtract.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLChannelExtract.h" + +#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLChannelExtract::configure(const ICLTensor *input, Channel channel, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, channel, output); + _kernel = std::move(k); +} + +void CLChannelExtract::configure(const ICLMultiImage *input, Channel channel, ICLImage *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, channel, output); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLColorConvert.cpp b/src/runtime/CL/functions/CLColorConvert.cpp new file mode 100644 index 0000000000..2fe465aeb8 --- /dev/null +++ b/src/runtime/CL/functions/CLColorConvert.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLColorConvert.h" + +#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLColorConvert::configure(const ICLTensor *input, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} + +void CLColorConvert::configure(const ICLImage *input, ICLMultiImage *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} + +void CLColorConvert::configure(const ICLMultiImage *input, ICLImage *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} + +void CLColorConvert::configure(const ICLMultiImage *input, ICLMultiImage *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLConvolution.cpp b/src/runtime/CL/functions/CLConvolution.cpp new file mode 100644 index 0000000000..21b5d47679 --- /dev/null +++ b/src/runtime/CL/functions/CLConvolution.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLConvolution.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +#include + +using namespace arm_compute; + +void CLConvolution3x3::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} + +template +CLConvolutionSquare::CLConvolutionSquare() + : _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler() +{ +} + +template +void CLConvolutionSquare::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(conv == nullptr); + int16_t conv_col[matrix_size]; + int16_t conv_row[matrix_size]; + _is_separable = separate_matrix(conv, conv_col, conv_row, matrix_size); + + if(_is_separable) + { + std::pair type_pair = data_type_for_convolution(conv_col, conv_row, matrix_size); + _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, type_pair.first)); + + if(scale == 0) + { + scale = calculate_matrix_scale(conv, matrix_size); + } + + _kernel_hor.configure(input, &_tmp, conv_row, border_mode == BorderMode::UNDEFINED); + _kernel_vert.configure(&_tmp, output, conv_col, scale, border_mode == BorderMode::UNDEFINED, type_pair.second); + _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + + // Allocate intermediate buffer + _tmp.allocator()->allocate(); + } + else + { + _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); + _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value)); + } +} + +template +void CLConvolutionSquare::run() +{ + CLScheduler::get().enqueue(_border_handler); + + if(_is_separable) + { + CLScheduler::get().enqueue(_kernel_hor, false); + CLScheduler::get().enqueue(_kernel_vert); + } + else + { + CLScheduler::get().enqueue(_kernel); + } +} + +template class arm_compute::CLConvolutionSquare<5>; +template class arm_compute::CLConvolutionSquare<7>; +template class arm_compute::CLConvolutionSquare<9>; + +void CLConvolutionRectangle::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp new file mode 100644 index 0000000000..f0bbc3514f --- /dev/null +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include +#include + +using namespace arm_compute; + +CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights() + : _weights_reshape_kernel(), _weights_transposed_kernel(), _weights_reshaped(), _transpose1xW(false) +{ +} + +void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, biases, output); + ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); + + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); + ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3)); + ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); + } + + const bool _has_bias = (biases != nullptr); + + _transpose1xW = transpose1xW; + + if(transpose1xW) + { + // Create tensor to store the reshaped weights + const unsigned int mat_weights_cols = weights->info()->dimension(3); + const unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + (_has_bias ? 1 : 0); + TensorShape shape_wr(mat_weights_cols, mat_weights_rows); + const DataType dt = weights->info()->data_type(); + TensorInfo info_wr(shape_wr, 1, dt); + + _weights_reshaped.allocator()->init(info_wr); + _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); + _weights_transposed_kernel.configure(&_weights_reshaped, output); + _weights_reshaped.allocator()->allocate(); + } + else + { + _weights_reshape_kernel.configure(weights, biases, output); + } +} + +void CLConvolutionLayerReshapeWeights::run() +{ + cl::CommandQueue q = CLScheduler::get().queue(); + CLScheduler::get().enqueue(_weights_reshape_kernel); + if(_transpose1xW) + { + CLScheduler::get().enqueue(_weights_transposed_kernel); + } +} + +CLConvolutionLayer::CLConvolutionLayer() + : _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), + _weights_transposed(), _gemm_output(), _has_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false) +{ +} + +void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); + + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && biases->info()->dimension(0) != weights->info()->dimension(3)); + ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); + } + + _has_bias = (biases != nullptr); + _are_weights_reshaped = weights_info.are_reshaped(); + + // Get parameters for conv_info + unsigned int stride_x = 0; + unsigned int stride_y = 0; + unsigned int pad_x = 0; + unsigned int pad_y = 0; + std::tie(stride_x, stride_y) = conv_info.stride(); + std::tie(pad_x, pad_y) = conv_info.pad(); + + // Get convolved dimensions + unsigned int conv_w = 0; + unsigned int conv_h = 0; + + const unsigned int kernel_width = _are_weights_reshaped ? weights_info.kernel_size() : weights->info()->dimension(0); + std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, + stride_x, stride_y, pad_x, pad_y, conv_info.round()); + ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one"); + + // Check if its a "fully connected" convolution + _is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1)); + + // Create tensor to store the reshaped weights + size_t mat_weights_cols = weights->info()->dimension(3); + size_t mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + ((_has_bias) ? 1 : 0); + if(_are_weights_reshaped) + { + mat_weights_cols = output->info()->dimension(2); + const unsigned int quarter_reshaped_cols = weights->info()->dimension(0) / 4; + mat_weights_rows = (_has_bias ? 1 + quarter_reshaped_cols : quarter_reshaped_cols); + } + else + { + if(_is_fully_connected_convolution) + { + // Create tensor to store the reshaped weights + TensorShape shape_wr(mat_weights_cols, mat_weights_rows); + TensorInfo info_wr(shape_wr, 1, weights->info()->data_type()); + _weights_reshaped.allocator()->init(info_wr); + _reshape_weights.configure(weights, biases, &_weights_reshaped, false); + weights = &_weights_reshaped; + } + else + { + // Create tensor to store transposed weights + TensorShape shape_wt(mat_weights_rows * 4, static_cast(std::ceil(mat_weights_cols / 4.f))); + TensorInfo info_wt(shape_wt, 1, weights->info()->data_type()); + _weights_transposed.allocator()->init(info_wt); + _reshape_weights.configure(weights, biases, &_weights_transposed, true); + weights = &_weights_transposed; + } + } + // Create tensor to store im2col reshaped inputs + const size_t mat_input_cols = mat_weights_rows; + const size_t mat_input_rows = conv_w * conv_h; + TensorShape shape_im2col = input->info()->tensor_shape(); + shape_im2col.set(0, mat_input_cols); + shape_im2col.set(1, mat_input_rows); + shape_im2col.set(2, 1); + _input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, input->info()->data_type())); + + // Create tensor (interleave) to prepare input tensor for GEMM + if(!_is_fully_connected_convolution) + { + TensorShape shape_interleaved = shape_im2col; + shape_interleaved.set(0, shape_interleaved.x() * 4); + shape_interleaved.set(1, std::ceil(static_cast(shape_interleaved.y()) / 4.f)); + _input_interleaved_reshaped.allocator()->init(TensorInfo(shape_interleaved, 1, input->info()->data_type())); + } + + // Create GEMM output tensor + TensorShape shape_gemm = _input_im2col_reshaped.info()->tensor_shape(); + shape_gemm.set(0, mat_weights_cols); + shape_gemm.set(1, mat_input_rows); + _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type())); + + // Configure kernels + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); + + if(_is_fully_connected_convolution) + { + _mm_kernel.configure(&_input_im2col_reshaped, weights, &_gemm_output, 1.0f); + } + else + { + _input_interleave_kernel.configure(&_input_im2col_reshaped, &_input_interleaved_reshaped); + _mm_kernel.configure(&_input_interleaved_reshaped, weights, &_gemm_output, 1.0f); + } + + if(!_are_weights_reshaped) + { + if(!_is_fully_connected_convolution) + { + _weights_transposed.allocator()->allocate(); + } + else + { + _weights_reshaped.allocator()->allocate(); + } + } + + _input_im2col_reshaped.allocator()->allocate(); + if(!_is_fully_connected_convolution) + { + _input_interleaved_reshaped.allocator()->allocate(); + } + _gemm_output.allocator()->allocate(); +} + +void CLConvolutionLayer::run() +{ + // Run weights reshaping (Runs once for every configure) + if(!_are_weights_reshaped) + { + _are_weights_reshaped = true; + _reshape_weights.run(); + } + + // Run input reshaping + CLScheduler::get().enqueue(_input_im2col_kernel); + if(!_is_fully_connected_convolution) + { + CLScheduler::get().enqueue(_input_interleave_kernel); + } + + // Runs matrix multiply on reshaped matrices + CLScheduler::get().enqueue(_mm_kernel); + + // Reshape output matrix + CLScheduler::get().enqueue(_output_col2im_kernel, false); +} diff --git a/src/runtime/CL/functions/CLDepthConcatenate.cpp b/src/runtime/CL/functions/CLDepthConcatenate.cpp new file mode 100644 index 0000000000..d967d9865f --- /dev/null +++ b/src/runtime/CL/functions/CLDepthConcatenate.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLDepthConcatenate::CLDepthConcatenate() + : _inputs_vector(), _concat_kernels_vector(), _border_handlers_vector(), _num_inputs(0) +{ +} + +void CLDepthConcatenate::configure(std::vector inputs_vector, ICLTensor *output) +{ + ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); + + _num_inputs = inputs_vector.size(); + + unsigned int depth_offset = 0; + + _concat_kernels_vector = arm_compute::cpp14::make_unique(_num_inputs); + _border_handlers_vector = arm_compute::cpp14::make_unique(_num_inputs); + + for(unsigned int i = 0; i < _num_inputs; i++) + { + _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); + _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0)); + + depth_offset += inputs_vector.at(i)->info()->dimension(2); + } +} + +void CLDepthConcatenate::run() +{ + cl::CommandQueue q = CLScheduler::get().queue(); + + for(unsigned i = 0; i < _num_inputs; i++) + { + CLScheduler::get().enqueue(_border_handlers_vector[i], false); + CLScheduler::get().enqueue(_concat_kernels_vector[i], true); + } +} diff --git a/src/runtime/CL/functions/CLDepthConvert.cpp b/src/runtime/CL/functions/CLDepthConvert.cpp new file mode 100644 index 0000000000..edcd4928ab --- /dev/null +++ b/src/runtime/CL/functions/CLDepthConvert.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" + +#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLDepthConvert::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, policy, shift); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLDerivative.cpp b/src/runtime/CL/functions/CLDerivative.cpp new file mode 100644 index 0000000000..c51cb4c333 --- /dev/null +++ b/src/runtime/CL/functions/CLDerivative.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLDerivative.h" + +#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void CLDerivative::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, 1, border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLDilate.cpp b/src/runtime/CL/functions/CLDilate.cpp new file mode 100644 index 0000000000..345f47763c --- /dev/null +++ b/src/runtime/CL/functions/CLDilate.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLDilate.h" + +#include "arm_compute/core/CL/kernels/CLDilateKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void CLDilate::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, 1, border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLEqualizeHistogram.cpp b/src/runtime/CL/functions/CLEqualizeHistogram.cpp new file mode 100644 index 0000000000..3b182d31b6 --- /dev/null +++ b/src/runtime/CL/functions/CLEqualizeHistogram.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h" + +#include "arm_compute/core/CL/ICLDistribution1D.h" +#include "arm_compute/core/CL/ICLLut.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include +#include +#include +#include + +using namespace arm_compute; + +namespace +{ +void calculate_cum_dist_and_lut(CLDistribution1D &dist, CLDistribution1D &cum_dist, CLLut &lut) +{ + dist.map(true); + cum_dist.map(true); + lut.map(true); + + const uint32_t *dist_ptr = dist.buffer(); + uint32_t *cum_dist_ptr = cum_dist.buffer(); + uint8_t *lut_ptr = lut.buffer(); + + ARM_COMPUTE_ERROR_ON(dist_ptr == nullptr); + ARM_COMPUTE_ERROR_ON(cum_dist_ptr == nullptr); + ARM_COMPUTE_ERROR_ON(lut_ptr == nullptr); + + // Calculate cumulative distribution + std::partial_sum(dist_ptr, dist_ptr + 256, cum_dist_ptr); + + // Get the number of pixels that have the lowest value in the input image + const uint32_t num_lowest_pixels = *std::find_if(dist_ptr, dist_ptr + 256, [](const uint32_t &v) + { + return v > 0; + }); + const size_t image_size = cum_dist_ptr[255]; + + if(image_size == num_lowest_pixels) + { + std::iota(lut_ptr, lut_ptr + 256, 0); + } + else + { + const float diff = image_size - num_lowest_pixels; + + for(size_t i = 0; i < 256; ++i) + { + lut_ptr[i] = lround((cum_dist_ptr[i] - num_lowest_pixels) / diff * 255.f); + } + } + + dist.unmap(); + cum_dist.unmap(); + lut.unmap(); +} +} // namespace + +CLEqualizeHistogram::CLEqualizeHistogram() + : _histogram_kernel(), _border_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8) +{ +} + +void CLEqualizeHistogram::configure(const ICLImage *input, ICLImage *output) +{ + _histogram_kernel.configure(input, &_hist); + _border_histogram_kernel.configure(input, &_hist); + _map_histogram_kernel.configure(input, &_cd_lut, output); +} + +void CLEqualizeHistogram::run() +{ + // Calculate histogram of input. + CLScheduler::get().enqueue(_histogram_kernel, false); + + // Calculate remaining pixels when image is not multiple of the elements of histogram kernel + CLScheduler::get().enqueue(_border_histogram_kernel, false); + + // Calculate cumulative distribution of histogram and create LUT. + calculate_cum_dist_and_lut(_hist, _cum_dist, _cd_lut); + + // Map input to output using created LUT. + CLScheduler::get().enqueue(_map_histogram_kernel); +} diff --git a/src/runtime/CL/functions/CLErode.cpp b/src/runtime/CL/functions/CLErode.cpp new file mode 100644 index 0000000000..b4c50e465a --- /dev/null +++ b/src/runtime/CL/functions/CLErode.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLErode.h" + +#include "arm_compute/core/CL/kernels/CLErodeKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void CLErode::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, 1, border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLFastCorners.cpp b/src/runtime/CL/functions/CLFastCorners.cpp new file mode 100644 index 0000000000..d2903fb849 --- /dev/null +++ b/src/runtime/CL/functions/CLFastCorners.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLFastCorners.h" + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +#include +#include + +using namespace arm_compute; + +CLFastCorners::CLFastCorners() + : _fast_corners_kernel(), + _suppr_func(), + _copy_array_kernel(), + _output(), + _suppr(), + _win(), + _non_max(false), + _num_corners(nullptr), + _num_buffer(), + _corners(nullptr), + _constant_border_value(0) +{ +} + +void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonmax_suppression, CLKeyPointArray *const corners, + unsigned int *num_corners, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); + ARM_COMPUTE_ERROR_ON(BorderMode::UNDEFINED != border_mode); + ARM_COMPUTE_ERROR_ON(nullptr == corners); + ARM_COMPUTE_ERROR_ON(threshold < 1 && threshold > 255); + + TensorInfo tensor_info(input->info()->tensor_shape(), 1, DataType::U8); + _output.allocator()->init(tensor_info); + + _non_max = nonmax_suppression; + _num_corners = num_corners; + _corners = corners; + _num_buffer = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(unsigned int)); + _constant_border_value = constant_border_value; + + const bool update_number = (nullptr != _num_corners); + + _fast_corners_kernel.configure(input, &_output, threshold, nonmax_suppression, border_mode); + + if(!_non_max) + { + _copy_array_kernel.configure(&_output, update_number, corners, &_num_buffer); + } + else + { + _suppr.allocator()->init(tensor_info); + + _suppr_func.configure(&_output, &_suppr, border_mode); + _copy_array_kernel.configure(&_suppr, update_number, corners, &_num_buffer); + + _suppr.allocator()->allocate(); + } + + // Allocate intermediate tensors + _output.allocator()->allocate(); +} + +void CLFastCorners::run() +{ + cl::CommandQueue q = CLScheduler::get().queue(); + + if(_non_max) + { + ARM_COMPUTE_ERROR_ON_MSG(_output.cl_buffer().get() == nullptr, "Unconfigured function"); + const auto out_buffer = static_cast(q.enqueueMapBuffer(_output.cl_buffer(), CL_TRUE, CL_MAP_WRITE, 0, _output.info()->total_size())); + memset(out_buffer, 0, _output.info()->total_size()); + q.enqueueUnmapMemObject(_output.cl_buffer(), out_buffer); + } + + CLScheduler::get().enqueue(_fast_corners_kernel, false); + + if(_non_max) + { + _suppr_func.run(); + } + + CLScheduler::get().enqueue(_copy_array_kernel, false); + + unsigned int get_num_corners = 0; + q.enqueueReadBuffer(_num_buffer, CL_TRUE, 0, sizeof(unsigned int), &get_num_corners); + + size_t corner_size = std::min(static_cast(get_num_corners), _corners->max_num_values()); + + _corners->resize(corner_size); + + if(_num_corners != nullptr) + { + *_num_corners = get_num_corners; + } + + q.flush(); +} diff --git a/src/runtime/CL/functions/CLFillBorder.cpp b/src/runtime/CL/functions/CLFillBorder.cpp new file mode 100644 index 0000000000..9e59b771d8 --- /dev/null +++ b/src/runtime/CL/functions/CLFillBorder.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLFillBorder.h" + +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLFillBorder::configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(tensor, border_width, border_mode, constant_border_value); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp new file mode 100644 index 0000000000..57d57d517f --- /dev/null +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" + +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include +#include + +using namespace arm_compute; + +CLFullyConnectedLayerReshapeWeights::CLFullyConnectedLayerReshapeWeights() + : _transpose_kernel(), _transpose1xW_kernel(), _transpose_output(), _transpose_weights(false), _is_batched_fc_layer(false) +{ +} + +void CLFullyConnectedLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output, bool transpose_weights, bool is_batched_fc_layer) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON(output == nullptr); + ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != 2); + ARM_COMPUTE_ERROR_ON((transpose_weights == false) && (is_batched_fc_layer == false)); + + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + _transpose_weights = transpose_weights; + _is_batched_fc_layer = is_batched_fc_layer; + + // Check if we need to transpose the weights + if(_transpose_weights) + { + if(_is_batched_fc_layer) + { + // Initialize the output tensor for transpose + TensorShape shape_transposed(input->info()->dimension(1), input->info()->dimension(0)); + _transpose_output.allocator()->init(TensorInfo(shape_transposed, 1, dt, fixed_point_position)); + _transpose_kernel.configure(input, &_transpose_output); + + // Configure transpose 1xW kernel + _transpose1xW_kernel.configure(&_transpose_output, output); + + // Allocate temporary tensor used for transposing the weights + _transpose_output.allocator()->allocate(); + } + else + { + _transpose_kernel.configure(input, output); + } + } + else + { + if(_is_batched_fc_layer) + { + // Configure transpose 1xW kernel + _transpose1xW_kernel.configure(input, output); + } + else + { + ARM_COMPUTE_ERROR("Configuration transpose_weights=false & is_batched_fc_layer=false not supported"); + } + } +} + +void CLFullyConnectedLayerReshapeWeights::run() +{ + if(_transpose_weights) + { + CLScheduler::get().enqueue(_transpose_kernel, _is_batched_fc_layer); + } + if(_is_batched_fc_layer) + { + CLScheduler::get().enqueue(_transpose1xW_kernel); + } +} + +CLFullyConnectedLayer::CLFullyConnectedLayer() + : _im2col_kernel(), _reshape_weights_kernel(), _interleave4x4_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _interleave4x4_output(), _reshape_weights_output(), + _are_weights_reshaped(true), _is_fc_after_conv(true), _is_batched_fc_layer(false), _accumulate_biases(false) +{ +} + +void CLFullyConnectedLayer::configure_conv_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output) +{ + ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2) * (16 / weights->info()->element_size()))); + + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + // If the fully connected layer is called after a convolution layer, the input tensor must be linearized + + // Initialize output tensor for im2col + TensorShape shape_im2col; + shape_im2col.set(0, input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)); + shape_im2col.set(1, input->info()->dimension(3)); + shape_im2col.set(2, input->info()->dimension(4)); + shape_im2col.set(3, input->info()->dimension(5)); + _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); + + // Initialize output tensor for interleave 4x4 + TensorShape shape_interleaved = _im2col_output.info()->tensor_shape(); + shape_interleaved.set(0, shape_interleaved.x() * 4); + shape_interleaved.set(1, std::ceil(static_cast(shape_interleaved.y()) / 4)); + _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); + + // Configure im2col kernel + _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + + // Configure interleave4x4 kernel + _interleave4x4_kernel.configure(&_im2col_output, &_interleave4x4_output); + + // Configure matrix multiply kernel + _mm_kernel.configure(&_interleave4x4_output, weights, output, 1.0f); + + // Allocate the tensors once all the configure methods have been called + _im2col_output.allocator()->allocate(); + _interleave4x4_output.allocator()->allocate(); +} + +void CLFullyConnectedLayer::configure_fc_fc_wb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output) +{ + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + // Initialize output tensor for interleave 4x4 + TensorShape shape_interleaved = input->info()->tensor_shape(); + shape_interleaved.set(0, shape_interleaved.x() * 4); + shape_interleaved.set(1, std::ceil(static_cast(shape_interleaved.y()) / 4)); + _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); + + // Configure interleave4x4 kernel + _interleave4x4_kernel.configure(input, &_interleave4x4_output); + + // Configure matrix multiply kernel + _mm_kernel.configure(&_interleave4x4_output, weights, output, 1.0f); + + // Allocate the tensors once all the configure methods have been called + _interleave4x4_output.allocator()->allocate(); +} + +void CLFullyConnectedLayer::configure_conv_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output) +{ + ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); + + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + // If the fully connected layer is called after a convolution layer, the input tensor must be linearized + + // Initialize output tensor for im2col + TensorShape shape_im2col; + shape_im2col.set(0, input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)); + shape_im2col.set(1, 1); + _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); + + // Configure im2col kernel + _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + + // Configure matrix multiply kernel + _mm_kernel.configure(&_im2col_output, weights, output, 1.0f); + + // Allocate the output tensor for im2col once all the configure methods have been called + _im2col_output.allocator()->allocate(); +} + +void CLFullyConnectedLayer::configure_fc_fc_nb(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output) +{ + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); + + // Configure matrix multiply kernel + _mm_kernel.configure(input, weights, output, 1.0f); +} + +void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose_weights, bool are_weights_reshaped) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() != 2); + + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + _are_weights_reshaped = are_weights_reshaped; + _is_fc_after_conv = true; + _is_batched_fc_layer = false; + _accumulate_biases = false; + + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + + _accumulate_biases = true; + + // Configure accumulate biases kernel + _accumulate_biases_kernel.configure(output, biases); + } + + // With the Fully Connected layer we can have 4 different cases: + // 1) Convolution layer -> Fully Connected layer without batches + // 2) Fully Connected layer -> Fully Connected layer without batches + // 3) Convolution layer -> Fully Connected layer with batches + // 4) Fully Connected layer -> Fully Connected layer with batches + + // Check if we have a fully connected layer with batches + _is_batched_fc_layer = (output->info()->dimension(1) > 1); + + const ICLTensor *weights_to_use = weights; + + if(!are_weights_reshaped) + { + if((transpose_weights || _is_batched_fc_layer)) + { + weights_to_use = &_reshape_weights_output; + + if(transpose_weights) + { + if(_is_batched_fc_layer) + { + const float transpose_width = 16.0f / input->info()->element_size(); + TensorShape shape_wt(weights->info()->dimension(0) * static_cast(transpose_width), static_cast(std::ceil(weights->info()->dimension(1) / transpose_width))); + TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position); + _reshape_weights_output.allocator()->init(info_wt); + } + else + { + TensorShape shape_wt(weights->info()->dimension(1), weights->info()->dimension(0)); + TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position); + _reshape_weights_output.allocator()->init(info_wt); + } + } + else + { + ARM_COMPUTE_ERROR_ON(!_is_batched_fc_layer); + + const float transpose_width = 16.0f / input->info()->element_size(); + TensorShape shape_wt(weights->info()->dimension(1) * static_cast(transpose_width), static_cast(std::ceil(weights->info()->dimension(0) / transpose_width))); + TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position); + _reshape_weights_output.allocator()->init(info_wt); + } + + // Reshape the weights + _reshape_weights_kernel.configure(weights, &_reshape_weights_output, transpose_weights, _is_batched_fc_layer); + } + } + + if(_is_batched_fc_layer) + { + _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3, + input->info()->tensor_shape().cend(), + output->info()->tensor_shape().cbegin() + 1)); + + if(_is_fc_after_conv) + { + // Fully Connected layer after a Convolution Layer with batches + configure_conv_fc_wb(input, weights_to_use, output); + } + else + { + // Fully Connected layer after a Fully Connected Layer with batches + configure_fc_fc_wb(input, weights_to_use, output); + } + } + else + { + // In case of not batched fully connected layer, the weights will not be reshaped using transposed1xW + _is_fc_after_conv = ((weights_to_use->info()->dimension(1)) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))); + + if(_is_fc_after_conv) + { + // Fully Connected layer after a Convolution Layer without batches + configure_conv_fc_nb(input, weights_to_use, output); + } + else + { + // Fully Connected layer after a Fully Connected Layer without batches + configure_fc_fc_nb(input, weights_to_use, output); + } + } + + // Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called + if(!are_weights_reshaped) + { + if(transpose_weights || _is_batched_fc_layer) + { + // Allocate the tensor for the weights reshaped + _reshape_weights_output.allocator()->allocate(); + } + } +} + +void CLFullyConnectedLayer::run() +{ + // Reshape of the weights (happens only once) + if(!_are_weights_reshaped) + { + _are_weights_reshaped = true; + _reshape_weights_kernel.run(); + } + + // Linearize input if it comes from a convolutional layer + if(_is_fc_after_conv) + { + CLScheduler::get().enqueue(_im2col_kernel, false); + } + + // Interleave input + if(_is_batched_fc_layer) + { + CLScheduler::get().enqueue(_interleave4x4_kernel, false); + } + + // Run matrix multiply + CLScheduler::get().enqueue(_mm_kernel, !_accumulate_biases); + + // Accumulate biases if provided + if(_accumulate_biases) + { + CLScheduler::get().enqueue(_accumulate_biases_kernel); + } +} diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp new file mode 100644 index 0000000000..7408054127 --- /dev/null +++ b/src/runtime/CL/functions/CLGEMM.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLGEMM.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +using namespace arm_compute; + +CLGEMM::CLGEMM() + : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _run_vector_matrix_multiplication(false), _run_addition(false) +{ +} + +void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32, DataType::F16); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::F32, DataType::F16); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32, DataType::F16); + + if(c != nullptr) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(c, 1, DataType::F32, DataType::F16); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, c); + ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(1) != c->info()->dimension(1), "The C matrix must have the same number of rows as the matrix A"); + ARM_COMPUTE_ERROR_ON_MSG(b->info()->dimension(0) != c->info()->dimension(0), "The C matrix must have the same number of columns as the matrix C"); + ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(0) != output->info()->dimension(0), "The C matrix must have the same number of rows as the output matrix"); + ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(1) != output->info()->dimension(1), "The C matrix must have the same number of columns as the output matrix"); + } + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, output); + ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B"); + + // Check if the first input tensor is a vector. If so, all the kernels for reshaping the tensors can be skipped + if(a->info()->dimension(1) != 1) + { + _run_vector_matrix_multiplication = false; + + TensorShape shape_tmp_a = a->info()->tensor_shape(); + TensorShape shape_tmp_b = b->info()->tensor_shape(); + + shape_tmp_a.set(0, a->info()->dimension(0) * 4); + shape_tmp_a.set(1, std::ceil(a->info()->dimension(1) / 4.0f)); + + if(DataType::F32 == a->info()->data_type()) + { + shape_tmp_b.set(0, b->info()->dimension(1) * 4); + shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 4.0f)); + } + else if(DataType::F16 == a->info()->data_type()) + { + shape_tmp_b.set(0, b->info()->dimension(1) * 8); + shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 8.0f)); + } + else + { + ARM_COMPUTE_ERROR("DataType not supported"); + } + + TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type()); + _tmp_a.allocator()->init(info_a); + + TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type()); + _tmp_b.allocator()->init(info_b); + + // Configure interleave kernel + _interleave_kernel.configure(a, &_tmp_a); + + // Configure transpose kernel + _transpose_kernel.configure(b, &_tmp_b); + + // Configure matrix multiply kernel + _mm_kernel.configure(&_tmp_a, &_tmp_b, output, alpha); + + // Allocate intermediate tensors + _tmp_a.allocator()->allocate(); + _tmp_b.allocator()->allocate(); + } + else // The first input tensor is a vector + { + _run_vector_matrix_multiplication = true; + + // Configure the matrix multiply kernel + _mm_kernel.configure(a, b, output, alpha); + } + + // Configure matrix addition kernel + if(beta != 0 && c != nullptr) + { + _ma_kernel.configure(c, output, beta); + _run_addition = true; + } +} + +void CLGEMM::run() +{ + if(!_run_vector_matrix_multiplication) + { + // Run interleave kernel + CLScheduler::get().enqueue(_interleave_kernel, false); + + // Run transpose kernel + CLScheduler::get().enqueue(_transpose_kernel, false); + } + + // Run matrix multiply kernel + CLScheduler::get().enqueue(_mm_kernel, !_run_addition); + + // Run matrix addition kernel + if(_run_addition) + { + CLScheduler::get().enqueue(_ma_kernel); + } +} diff --git a/src/runtime/CL/functions/CLGEMMInterleave4x4.cpp b/src/runtime/CL/functions/CLGEMMInterleave4x4.cpp new file mode 100644 index 0000000000..9dc77156ef --- /dev/null +++ b/src/runtime/CL/functions/CLGEMMInterleave4x4.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h" + +#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/Helpers.h" + +using namespace arm_compute; + +void CLGEMMInterleave4x4::configure(const ICLTensor *input, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLGEMMLowp.cpp b/src/runtime/CL/functions/CLGEMMLowp.cpp new file mode 100644 index 0000000000..45e011d8ce --- /dev/null +++ b/src/runtime/CL/functions/CLGEMMLowp.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLGEMMLowp.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLGEMMLowp::CLGEMMLowp() + : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _tmp_a(), _tmp_b() +{ +} + +void CLGEMMLowp::configure(const ICLTensor *a, const ICLTensor *b, ICLTensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, output); + ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B"); + ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(1) != output->info()->dimension(1), "The C matrix must have the same number of rows as the matrix A"); + ARM_COMPUTE_ERROR_ON_MSG(b->info()->dimension(0) != output->info()->dimension(0), "The C matrix must have the same number of columns as the matrix C"); + + // Create shape for interleaved temporary tensor + TensorShape shape_tmp_a = a->info()->tensor_shape(); + shape_tmp_a.set(0, a->info()->dimension(0) * 4); + shape_tmp_a.set(1, ceil(a->info()->dimension(1) / 4)); + TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type()); + _tmp_a.allocator()->init(info_a); + + // Create shape for tranposed temporary tensor + TensorShape shape_tmp_b = b->info()->tensor_shape(); + shape_tmp_b.set(0, b->info()->dimension(1) * 16); + shape_tmp_b.set(1, std::ceil(static_cast(b->info()->dimension(0)) / 16)); + TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type()); + _tmp_b.allocator()->init(info_b); + + // Configure kernels + _interleave_kernel.configure(a, &_tmp_a); + _transpose_kernel.configure(b, &_tmp_b); + _mm_kernel.configure(&_tmp_a, &_tmp_b, output, a_offset, b_offset, output_offset, output_mult_int, shift); + + // Allocate intermediate buffers + _tmp_a.allocator()->allocate(); + _tmp_b.allocator()->allocate(); +} + +void CLGEMMLowp::run() +{ + /* Run interleave kernel */ + CLScheduler::get().enqueue(_interleave_kernel, false); + + /* Run transpose kernel */ + CLScheduler::get().enqueue(_transpose_kernel, false); + + /* Run matrix multiply kernel */ + CLScheduler::get().enqueue(_mm_kernel, false); +} diff --git a/src/runtime/CL/functions/CLGaussian3x3.cpp b/src/runtime/CL/functions/CLGaussian3x3.cpp new file mode 100644 index 0000000000..362a3fe920 --- /dev/null +++ b/src/runtime/CL/functions/CLGaussian3x3.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h" + +#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void CLGaussian3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLGaussian5x5.cpp b/src/runtime/CL/functions/CLGaussian5x5.cpp new file mode 100644 index 0000000000..e83a8fb857 --- /dev/null +++ b/src/runtime/CL/functions/CLGaussian5x5.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +#include + +using namespace arm_compute; + +CLGaussian5x5::CLGaussian5x5() + : _kernel_hor(), _kernel_vert(), _border_handler(), _tmp() +{ +} + +void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + + _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, DataType::U16)); + + _kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED); + _kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED); + _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + + // Allocate intermediate buffers + _tmp.allocator()->allocate(); +} + +void CLGaussian5x5::run() +{ + CLScheduler::get().enqueue(_border_handler, false); + CLScheduler::get().enqueue(_kernel_hor, false); + CLScheduler::get().enqueue(_kernel_vert); +} diff --git a/src/runtime/CL/functions/CLGaussianPyramid.cpp b/src/runtime/CL/functions/CLGaussianPyramid.cpp new file mode 100644 index 0000000000..8a4279e99b --- /dev/null +++ b/src/runtime/CL/functions/CLGaussianPyramid.cpp @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" +#include "arm_compute/core/CL/kernels/CLScaleKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" + +#include + +using namespace arm_compute; + +CLGaussianPyramid::CLGaussianPyramid() + : _input(nullptr), _pyramid(nullptr), _tmp() +{ +} + +CLGaussianPyramidHalf::CLGaussianPyramidHalf() + : _border_handler(), _horizontal_reduction(), _vertical_reduction() +{ +} + +void CLGaussianPyramidHalf::configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(pyramid == nullptr); + ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height()); + ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_HALF != pyramid->info()->scale()); + + /* Get number of pyramid levels */ + const size_t num_levels = pyramid->info()->num_levels(); + + _input = input; + _pyramid = pyramid; + + if(num_levels > 1) + { + _border_handler = arm_compute::cpp14::make_unique(num_levels - 1); + _horizontal_reduction = arm_compute::cpp14::make_unique(num_levels - 1); + _vertical_reduction = arm_compute::cpp14::make_unique(num_levels - 1); + + // Apply half scale to the X dimension of the tensor shape + TensorShape tensor_shape = pyramid->info()->tensor_shape(); + tensor_shape.set(0, (pyramid->info()->width() + 1) * SCALE_PYRAMID_HALF); + + PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_HALF, tensor_shape, Format::U16); + + _tmp.init(pyramid_info); + + for(size_t i = 0; i < num_levels - 1; ++i) + { + /* Configure horizontal kernel */ + _horizontal_reduction[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode == BorderMode::UNDEFINED); + + /* Configure vertical kernel */ + _vertical_reduction[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), border_mode == BorderMode::UNDEFINED); + + /* Configure border */ + _border_handler[i].configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value)); + } + _tmp.allocate(); + } +} + +void CLGaussianPyramidHalf::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_pyramid == nullptr, "Unconfigured function"); + + /* Get number of pyramid levels */ + const size_t num_levels = _pyramid->info()->num_levels(); + + /* The first level of the pyramid has the input image */ + _pyramid->get_pyramid_level(0)->map(CLScheduler::get().queue(), true /* blocking */); + _input->map(CLScheduler::get().queue(), true /* blocking */); + _pyramid->get_pyramid_level(0)->copy_from(*_input); + _input->unmap(CLScheduler::get().queue()); + _pyramid->get_pyramid_level(0)->unmap(CLScheduler::get().queue()); + + for(unsigned int i = 0; i < num_levels - 1; ++i) + { + CLScheduler::get().enqueue(_border_handler[i], false); + CLScheduler::get().enqueue(_horizontal_reduction[i], false); + CLScheduler::get().enqueue(_vertical_reduction[i], false); + } +} + +CLGaussianPyramidOrb::CLGaussianPyramidOrb() + : _gauss5x5(), _scale_nearest() +{ +} + +void CLGaussianPyramidOrb::configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(nullptr == pyramid); + ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height()); + ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_ORB != pyramid->info()->scale()); + + /* Get number of pyramid levels */ + const size_t num_levels = pyramid->info()->num_levels(); + + _input = input; + _pyramid = pyramid; + + if(num_levels > 1) + { + _gauss5x5 = arm_compute::cpp14::make_unique(num_levels - 1); + _scale_nearest = arm_compute::cpp14::make_unique(num_levels - 1); + + PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_ORB, pyramid->info()->tensor_shape(), Format::U8); + + _tmp.init(pyramid_info); + + for(size_t i = 0; i < num_levels - 1; ++i) + { + /* Configure gaussian 5x5 */ + _gauss5x5[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value); + + /* Configure scale image kernel */ + _scale_nearest[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), InterpolationPolicy::NEAREST_NEIGHBOR, border_mode == BorderMode::UNDEFINED); + } + + _tmp.allocate(); + } +} + +void CLGaussianPyramidOrb::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_pyramid == nullptr, "Unconfigured function"); + + /* Get number of pyramid levels */ + const size_t num_levels = _pyramid->info()->num_levels(); + + /* The first level of the pyramid has the input image */ + _pyramid->get_pyramid_level(0)->map(CLScheduler::get().queue(), true /* blocking */); + _input->map(CLScheduler::get().queue(), true /* blocking */); + _pyramid->get_pyramid_level(0)->copy_from(*_input); + _input->unmap(CLScheduler::get().queue()); + _pyramid->get_pyramid_level(0)->unmap(CLScheduler::get().queue()); + + for(unsigned int i = 0; i < num_levels - 1; ++i) + { + _gauss5x5[i].run(); + CLScheduler::get().enqueue(_scale_nearest[i]); + } +} diff --git a/src/runtime/CL/functions/CLHOGDescriptor.cpp b/src/runtime/CL/functions/CLHOGDescriptor.cpp new file mode 100644 index 0000000000..b1b5a03ac1 --- /dev/null +++ b/src/runtime/CL/functions/CLHOGDescriptor.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/Size2D.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLHOGDescriptor::CLHOGDescriptor() + : _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space() +{ +} + +void CLHOGDescriptor::configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(nullptr == output); + ARM_COMPUTE_ERROR_ON(nullptr == hog); + + const HOGInfo *hog_info = hog->info(); + const size_t width = input->info()->dimension(Window::DimX); + const size_t height = input->info()->dimension(Window::DimY); + const size_t num_bins = hog_info->num_bins(); + + Size2D cell_size = hog_info->cell_size(); + + // Calculate number of cells along the x and y directions for the hog_space + const size_t num_cells_x = width / cell_size.width; + const size_t num_cells_y = height / cell_size.height; + + // TensorShape of the input image + const TensorShape &shape_img = input->info()->tensor_shape(); + + // TensorShape of the hog space + TensorShape shape_hog_space = input->info()->tensor_shape(); + shape_hog_space.set(Window::DimX, num_cells_x); + shape_hog_space.set(Window::DimY, num_cells_y); + + // Intitialize tensors for magnitude, phase and hog space + TensorInfo info_mag(shape_img, Format::S16); + _mag.allocator()->init(info_mag); + + TensorInfo info_phase(shape_img, Format::U8); + _phase.allocator()->init(info_phase); + + TensorInfo info_space(shape_hog_space, num_bins, DataType::F32); + _hog_space.allocator()->init(info_space); + + // Initialise gradient kernel + _gradient.configure(input, &_mag, &_phase, hog_info->phase_type(), border_mode, constant_border_value); + + // Initialise orientation binning kernel + _orient_bin.configure(&_mag, &_phase, &_hog_space, hog->info()); + + // Initialize HOG norm kernel + _block_norm.configure(&_hog_space, output, hog->info()); + + // Allocate intermediate tensors + _mag.allocator()->allocate(); + _phase.allocator()->allocate(); + _hog_space.allocator()->allocate(); +} + +void CLHOGDescriptor::run() +{ + // Run gradient + _gradient.run(); + + // Run orientation binning + CLScheduler::get().enqueue(_orient_bin, false); + + // Run block normalization + CLScheduler::get().enqueue(_block_norm); +} \ No newline at end of file diff --git a/src/runtime/CL/functions/CLHOGDetector.cpp b/src/runtime/CL/functions/CLHOGDetector.cpp new file mode 100644 index 0000000000..8eb5e4251f --- /dev/null +++ b/src/runtime/CL/functions/CLHOGDetector.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLHOGDetector.h" + +#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include + +using namespace arm_compute; + +CLHOGDetector::CLHOGDetector() + : _hog_detector_kernel(), _detection_windows(nullptr), _num_detection_windows() +{ +} + +void CLHOGDetector::configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class) +{ + _detection_windows = detection_windows; + + // Allocate buffer for storing the number of detected objects + _num_detection_windows = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(unsigned int)); + + // Configure HOGDetectorKernel + _hog_detector_kernel.configure(input, hog, detection_windows, &_num_detection_windows, detection_window_stride, threshold, idx_class); +} + +void CLHOGDetector::run() +{ + cl::CommandQueue q = CLScheduler::get().queue(); + + // Reset number of detections + const unsigned int init_num_detection_windows = _detection_windows->num_values(); + q.enqueueWriteBuffer(_num_detection_windows, CL_FALSE, 0, sizeof(unsigned int), &init_num_detection_windows); + + // Run CLHOGDetectorKernel + CLScheduler::get().enqueue(_hog_detector_kernel); + + // Read number of detections + unsigned int num_detection_windows = 0; + q.enqueueReadBuffer(_num_detection_windows, CL_TRUE, 0, sizeof(unsigned int), &num_detection_windows); + + // Update the number of values stored in _detection_windows + _detection_windows->resize(static_cast(num_detection_windows)); + + q.flush(); +} \ No newline at end of file diff --git a/src/runtime/CL/functions/CLHOGGradient.cpp b/src/runtime/CL/functions/CLHOGGradient.cpp new file mode 100644 index 0000000000..2387474358 --- /dev/null +++ b/src/runtime/CL/functions/CLHOGGradient.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLHOGGradient.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLHOGGradient::CLHOGGradient() + : _derivative(), _mag_phase(), _gx(), _gy() +{ +} + +void CLHOGGradient::configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_magnitude, 1, DataType::S16); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_phase, 1, DataType::U8); + + const TensorShape &shape_img = input->info()->tensor_shape(); + + // Allocate image memory + TensorInfo info(shape_img, Format::S16); + _gx.allocator()->init(info); + _gy.allocator()->init(info); + + // Initialise derivate kernel + _derivative.configure(input, &_gx, &_gy, border_mode, constant_border_value); + + // Initialise magnitude/phase kernel + if(PhaseType::UNSIGNED == phase_type) + { + _mag_phase.configure(&_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::UNSIGNED); + } + else + { + _mag_phase.configure(&_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::SIGNED); + } + + // Allocate intermediate tensors + _gx.allocator()->allocate(); + _gy.allocator()->allocate(); +} + +void CLHOGGradient::run() +{ + // Run derivative + _derivative.run(); + + // Run magnitude/phase kernel + CLScheduler::get().enqueue(_mag_phase); +} \ No newline at end of file diff --git a/src/runtime/CL/functions/CLHOGMultiDetection.cpp b/src/runtime/CL/functions/CLHOGMultiDetection.cpp new file mode 100644 index 0000000000..b8f2224ac8 --- /dev/null +++ b/src/runtime/CL/functions/CLHOGMultiDetection.cpp @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h" + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/CL/CLArray.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +using namespace arm_compute; + +CLHOGMultiDetection::CLHOGMultiDetection() + : _gradient_kernel(), _orient_bin_kernel(), _block_norm_kernel(), _hog_detect_kernel(), _non_maxima_kernel(), _hog_space(), _hog_norm_space(), _detection_windows(), _mag(), _phase(), + _non_maxima_suppression(false), _num_orient_bin_kernel(0), _num_block_norm_kernel(0), _num_hog_detect_kernel(0) +{ +} + +void CLHOGMultiDetection::configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode, + uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(multi_hog); + ARM_COMPUTE_ERROR_ON(nullptr == detection_windows); + ARM_COMPUTE_ERROR_ON(detection_window_strides->num_values() != multi_hog->num_models()); + + const size_t width = input->info()->dimension(Window::DimX); + const size_t height = input->info()->dimension(Window::DimY); + const TensorShape &shape_img = input->info()->tensor_shape(); + const size_t num_models = multi_hog->num_models(); + PhaseType phase_type = multi_hog->model(0)->info()->phase_type(); + + size_t prev_num_bins = multi_hog->model(0)->info()->num_bins(); + Size2D prev_cell_size = multi_hog->model(0)->info()->cell_size(); + Size2D prev_block_size = multi_hog->model(0)->info()->block_size(); + Size2D prev_block_stride = multi_hog->model(0)->info()->block_stride(); + + /* Check if CLHOGOrientationBinningKernel and CLHOGBlockNormalizationKernel kernels can be skipped for a specific HOG data-object + * + * 1) CLHOGOrientationBinningKernel and CLHOGBlockNormalizationKernel are skipped if the cell size and the number of bins don't change. + * Since "multi_hog" is sorted,it is enough to check the HOG descriptors at level "ith" and level "(i-1)th + * 2) CLHOGBlockNormalizationKernel is skipped if the cell size, the number of bins and block size do not change. + * Since "multi_hog" is sorted,it is enough to check the HOG descriptors at level "ith" and level "(i-1)th + * + * @note Since the orientation binning and block normalization kernels can be skipped, we need to keep track of the input to process for each kernel + * with "input_orient_bin", "input_hog_detect" and "input_block_norm" + */ + std::vector input_orient_bin; + std::vector input_hog_detect; + std::vector> input_block_norm; + + input_orient_bin.push_back(0); + input_hog_detect.push_back(0); + input_block_norm.emplace_back(0, 0); + + for(size_t i = 1; i < num_models; ++i) + { + size_t cur_num_bins = multi_hog->model(i)->info()->num_bins(); + Size2D cur_cell_size = multi_hog->model(i)->info()->cell_size(); + Size2D cur_block_size = multi_hog->model(i)->info()->block_size(); + Size2D cur_block_stride = multi_hog->model(i)->info()->block_stride(); + + if((cur_num_bins != prev_num_bins) || (cur_cell_size.width != prev_cell_size.width) || (cur_cell_size.height != prev_cell_size.height)) + { + prev_num_bins = cur_num_bins; + prev_cell_size = cur_cell_size; + prev_block_size = cur_block_size; + prev_block_stride = cur_block_stride; + + // Compute orientation binning and block normalization kernels. Update input to process + input_orient_bin.push_back(i); + input_block_norm.emplace_back(i, input_orient_bin.size() - 1); + } + else if((cur_block_size.width != prev_block_size.width) || (cur_block_size.height != prev_block_size.height) || (cur_block_stride.width != prev_block_stride.width) + || (cur_block_stride.height != prev_block_stride.height)) + { + prev_block_size = cur_block_size; + prev_block_stride = cur_block_stride; + + // Compute block normalization kernel. Update input to process + input_block_norm.emplace_back(i, input_orient_bin.size() - 1); + } + + // Update input to process for hog detector kernel + input_hog_detect.push_back(input_block_norm.size() - 1); + } + + _detection_windows = detection_windows; + _non_maxima_suppression = non_maxima_suppression; + _num_orient_bin_kernel = input_orient_bin.size(); // Number of CLHOGOrientationBinningKernel kernels to compute + _num_block_norm_kernel = input_block_norm.size(); // Number of CLHOGBlockNormalizationKernel kernels to compute + _num_hog_detect_kernel = input_hog_detect.size(); // Number of CLHOGDetector functions to compute + + _orient_bin_kernel = arm_compute::cpp14::make_unique(_num_orient_bin_kernel); + _block_norm_kernel = arm_compute::cpp14::make_unique(_num_block_norm_kernel); + _hog_detect_kernel = arm_compute::cpp14::make_unique(_num_hog_detect_kernel); + _non_maxima_kernel = arm_compute::cpp14::make_unique(); + _hog_space = arm_compute::cpp14::make_unique(_num_orient_bin_kernel); + _hog_norm_space = arm_compute::cpp14::make_unique(_num_block_norm_kernel); + + // Allocate tensors for magnitude and phase + TensorInfo info_mag(shape_img, Format::S16); + _mag.allocator()->init(info_mag); + + TensorInfo info_phase(shape_img, Format::U8); + _phase.allocator()->init(info_phase); + + // Initialise gradient kernel + _gradient_kernel.configure(input, &_mag, &_phase, phase_type, border_mode, constant_border_value); + + // Configure NETensor for the HOG space and orientation binning kernel + for(size_t i = 0; i < _num_orient_bin_kernel; ++i) + { + const size_t idx_multi_hog = input_orient_bin[i]; + + // Get the corresponding cell size and number of bins + const Size2D &cell = multi_hog->model(idx_multi_hog)->info()->cell_size(); + const size_t num_bins = multi_hog->model(idx_multi_hog)->info()->num_bins(); + + // Calculate number of cells along the x and y directions for the hog_space + const size_t num_cells_x = width / cell.width; + const size_t num_cells_y = height / cell.height; + + // TensorShape of hog space + TensorShape shape_hog_space = input->info()->tensor_shape(); + shape_hog_space.set(Window::DimX, num_cells_x); + shape_hog_space.set(Window::DimY, num_cells_y); + + // Allocate HOG space + TensorInfo info_space(shape_hog_space, num_bins, DataType::F32); + _hog_space[i].allocator()->init(info_space); + + // Initialise orientation binning kernel + _orient_bin_kernel[i].configure(&_mag, &_phase, _hog_space.get() + i, multi_hog->model(idx_multi_hog)->info()); + } + + // Configure CLTensor for the normalized HOG space and block normalization kernel + for(size_t i = 0; i < _num_block_norm_kernel; ++i) + { + const size_t idx_multi_hog = input_block_norm[i].first; + const size_t idx_orient_bin = input_block_norm[i].second; + + // Allocate normalized HOG space + TensorInfo tensor_info(*(multi_hog->model(idx_multi_hog)->info()), width, height); + _hog_norm_space[i].allocator()->init(tensor_info); + + // Initialize block normalization kernel + _block_norm_kernel[i].configure(_hog_space.get() + idx_orient_bin, _hog_norm_space.get() + i, multi_hog->model(idx_multi_hog)->info()); + } + + detection_window_strides->map(CLScheduler::get().queue(), true); + + // Configure HOG detector kernel + for(size_t i = 0; i < _num_hog_detect_kernel; ++i) + { + const size_t idx_block_norm = input_hog_detect[i]; + + _hog_detect_kernel[i].configure(_hog_norm_space.get() + idx_block_norm, multi_hog->cl_model(i), detection_windows, detection_window_strides->at(i), threshold, i); + } + + detection_window_strides->unmap(CLScheduler::get().queue()); + + // Configure non maxima suppression kernel + _non_maxima_kernel->configure(_detection_windows, min_distance); + + // Allocate intermediate tensors + _mag.allocator()->allocate(); + _phase.allocator()->allocate(); + + for(size_t i = 0; i < _num_orient_bin_kernel; ++i) + { + _hog_space[i].allocator()->allocate(); + } + + for(size_t i = 0; i < _num_block_norm_kernel; ++i) + { + _hog_norm_space[i].allocator()->allocate(); + } +} + +void CLHOGMultiDetection::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_detection_windows == nullptr, "Unconfigured function"); + + // Reset detection window + _detection_windows->clear(); + + // Run gradient + _gradient_kernel.run(); + + // Run orientation binning kernel + for(size_t i = 0; i < _num_orient_bin_kernel; ++i) + { + CLScheduler::get().enqueue(*(_orient_bin_kernel.get() + i), false); + } + + // Run block normalization kernel + for(size_t i = 0; i < _num_block_norm_kernel; ++i) + { + CLScheduler::get().enqueue(*(_block_norm_kernel.get() + i), false); + } + + // Run HOG detector kernel + for(size_t i = 0; i < _num_hog_detect_kernel; ++i) + { + _hog_detect_kernel[i].run(); + } + + // Run non-maxima suppression kernel if enabled + if(_non_maxima_suppression) + { + // Map detection windows array before computing non maxima suppression + _detection_windows->map(CLScheduler::get().queue(), true); + _non_maxima_kernel->run(_non_maxima_kernel->window()); + _detection_windows->unmap(CLScheduler::get().queue()); + } +} \ No newline at end of file diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp new file mode 100644 index 0000000000..2db277fa4d --- /dev/null +++ b/src/runtime/CL/functions/CLHarrisCorners.cpp @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h" + +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/functions/CLSobel3x3.h" +#include "arm_compute/runtime/CL/functions/CLSobel5x5.h" +#include "arm_compute/runtime/CL/functions/CLSobel7x7.h" +#include "arm_compute/runtime/ITensorAllocator.h" +#include "arm_compute/runtime/Scheduler.h" + +#include +#include + +using namespace arm_compute; + +CLHarrisCorners::CLHarrisCorners() + : _sobel(), _harris_score(), _non_max_suppr(), _candidates(), _sort_euclidean(), _border_gx(), _border_gy(), _gx(), _gy(), _score(), _nonmax(), _corners_list(), _num_corner_candidates(0), + _corners(nullptr) +{ +} + +void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist, + float sensitivity, int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(!(block_size == 3 || block_size == 5 || block_size == 7)); + ARM_COMPUTE_ERROR_ON(nullptr == corners); + + _corners = corners; + + const TensorShape shape = input->info()->tensor_shape(); + const DataType dt = (gradient_size < 7) ? DataType::S16 : DataType::S32; + TensorInfo tensor_info(shape, 1, dt); + _gx.allocator()->init(tensor_info); + _gy.allocator()->init(tensor_info); + + TensorInfo info_f32(shape, 1, DataType::F32); + _score.allocator()->init(info_f32); + _nonmax.allocator()->init(info_f32); + + _corners_list = arm_compute::cpp14::make_unique(shape.x() * shape.y()); + + /* Set/init Sobel kernel accordingly with gradient_size */ + switch(gradient_size) + { + case 3: + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + break; + } + case 5: + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + break; + } + case 7: + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + break; + } + default: + ARM_COMPUTE_ERROR("Gradient size not implemented"); + } + + // Configure border filling before harris score + _border_gx.configure(&_gx, block_size / 2, border_mode, constant_border_value); + _border_gy.configure(&_gy, block_size / 2, border_mode, constant_border_value); + + // Normalization factor + const float norm_factor = 1.0f / (255.0f * pow(4.0f, gradient_size / 2) * block_size); + const float pow4_normalization_factor = pow(norm_factor, 4); + + // Set/init Harris Score kernel accordingly with block_size + _harris_score.configure(&_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + + // Init non-maxima suppression function + _non_max_suppr.configure(&_score, &_nonmax, border_mode == BorderMode::UNDEFINED); + + // Init corner candidates kernel + _candidates.configure(&_nonmax, _corners_list.get(), &_num_corner_candidates); + + // Init euclidean distance + _sort_euclidean.configure(_corners_list.get(), _corners, &_num_corner_candidates, min_dist); + + // Allocate intermediate buffers + _gx.allocator()->allocate(); + _gy.allocator()->allocate(); + _score.allocator()->allocate(); + _nonmax.allocator()->allocate(); +} + +void CLHarrisCorners::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function"); + + // Init to 0 number of corner candidates + _num_corner_candidates = 0; + + // Run Sobel kernel + _sobel->run(); + + // Fill border before harris score kernel + CLScheduler::get().enqueue(_border_gx, false); + CLScheduler::get().enqueue(_border_gy, false); + + // Run harris score kernel + CLScheduler::get().enqueue(_harris_score, false); + + // Run non-maxima suppression + CLScheduler::get().enqueue(_non_max_suppr); + + // Run corner candidate kernel + _nonmax.map(true); + Scheduler::get().schedule(&_candidates, Window::DimY); + _nonmax.unmap(); + + _corners->map(CLScheduler::get().queue(), true); + _sort_euclidean.run(_sort_euclidean.window()); + _corners->unmap(CLScheduler::get().queue()); +} diff --git a/src/runtime/CL/functions/CLHistogram.cpp b/src/runtime/CL/functions/CLHistogram.cpp new file mode 100644 index 0000000000..eb543387f5 --- /dev/null +++ b/src/runtime/CL/functions/CLHistogram.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLHistogram.h" + +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLHistogram::CLHistogram() + : _kernel(), _kernel_border() +{ +} + +void CLHistogram::configure(const ICLImage *input, ICLDistribution1D *output) +{ + _kernel.configure(input, output); + _kernel_border.configure(input, output); +} + +void CLHistogram::run() +{ + CLScheduler::get().enqueue(_kernel, false); + CLScheduler::get().enqueue(_kernel_border); +} diff --git a/src/runtime/CL/functions/CLIntegralImage.cpp b/src/runtime/CL/functions/CLIntegralImage.cpp new file mode 100644 index 0000000000..2d54be32fa --- /dev/null +++ b/src/runtime/CL/functions/CLIntegralImage.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLIntegralImage.h" + +#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLIntegralImage::CLIntegralImage() + : _integral_hor(), _integral_vert() +{ +} + +void CLIntegralImage::configure(const ICLTensor *input, ICLTensor *output) +{ + _integral_hor.configure(input, output); + _integral_vert.configure(output); +} + +void CLIntegralImage::run() +{ + CLScheduler::get().enqueue(_integral_hor, false); + CLScheduler::get().enqueue(_integral_vert); +} diff --git a/src/runtime/CL/functions/CLLaplacianPyramid.cpp b/src/runtime/CL/functions/CLLaplacianPyramid.cpp new file mode 100644 index 0000000000..d7ce20653d --- /dev/null +++ b/src/runtime/CL/functions/CLLaplacianPyramid.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" +#include "arm_compute/runtime/CL/functions/CLDepthConvert.h" +#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" +#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" + +using namespace arm_compute; + +CLLaplacianPyramid::CLLaplacianPyramid() + : _num_levels(0), _gaussian_pyr_function(), _convf(), _subf(), _depth_function(), _gauss_pyr(), _conv_pyr() +{ +} + +void CLLaplacianPyramid::configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(nullptr == pyramid); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S16); + ARM_COMPUTE_ERROR_ON(0 == pyramid->info()->num_levels()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height()); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(1)); + + _num_levels = pyramid->info()->num_levels(); + + // Create and initialize the gaussian pyramid and the convoluted pyramid + PyramidInfo pyramid_info; + pyramid_info.init(_num_levels, 0.5f, pyramid->info()->tensor_shape(), arm_compute::Format::U8); + + _gauss_pyr.init(pyramid_info); + _conv_pyr.init(pyramid_info); + + // Create Gaussian Pyramid function + _gaussian_pyr_function.configure(input, &_gauss_pyr, border_mode, constant_border_value); + + _convf = arm_compute::cpp14::make_unique(_num_levels); + _subf = arm_compute::cpp14::make_unique(_num_levels); + + for(unsigned int i = 0; i < _num_levels; ++i) + { + _convf[i].configure(_gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), border_mode, constant_border_value); + _subf[i].configure(_gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), pyramid->get_pyramid_level(i), ConvertPolicy::WRAP); + } + + _depth_function.configure(_conv_pyr.get_pyramid_level(_num_levels - 1), output, ConvertPolicy::WRAP, 0); + + _gauss_pyr.allocate(); + _conv_pyr.allocate(); +} + +void CLLaplacianPyramid::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(0 == _num_levels, "Unconfigured function"); + + _gaussian_pyr_function.run(); // compute gaussian pyramid + + for(unsigned int i = 0; i < _num_levels; ++i) + { + _convf[i].run(); // convolute gaussian pyramid + } + + for(unsigned int i = 0; i < _num_levels; ++i) + { + _subf[i].run(); // compute laplacian image + } + + _depth_function.run(); +} diff --git a/src/runtime/CL/functions/CLLaplacianReconstruct.cpp b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp new file mode 100644 index 0000000000..1dfab740d7 --- /dev/null +++ b/src/runtime/CL/functions/CLLaplacianReconstruct.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" + +#include + +using namespace arm_compute; + +CLLaplacianReconstruct::CLLaplacianReconstruct() + : _tmp_pyr(), _addf(), _scalef(), _depthf() +{ +} + +void CLLaplacianReconstruct::configure(const CLPyramid *pyramid, const ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(nullptr == pyramid); + ARM_COMPUTE_ERROR_ON(input == output); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); + ARM_COMPUTE_ERROR_ON(output->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != pyramid->get_pyramid_level(0)->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != pyramid->get_pyramid_level(0)->info()->dimension(1)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(1)); + + const size_t num_levels = pyramid->info()->num_levels(); + + // Create and initialize the tmp pyramid: I(n-2) = upsample( input + Laplace(n-1) ) + PyramidInfo pyramid_info; + pyramid_info.init(num_levels, 0.5f, output->info()->tensor_shape(), arm_compute::Format::S16); + _tmp_pyr.init(pyramid_info); + + // Allocate add and scale functions. Level 0 does not need to be scaled. + _addf = arm_compute::cpp14::make_unique(num_levels); + _scalef = arm_compute::cpp14::make_unique(num_levels - 1); + + const size_t last_level = num_levels - 1; + + _addf[last_level].configure(input, pyramid->get_pyramid_level(last_level), _tmp_pyr.get_pyramid_level(last_level), ConvertPolicy::SATURATE); + + // Scale levels n-1 to 1, and add levels n-2 to 0 + for(size_t l = 0; l < last_level; ++l) + { + _scalef[l].configure(_tmp_pyr.get_pyramid_level(l + 1), _tmp_pyr.get_pyramid_level(l), arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value); + _addf[l].configure(_tmp_pyr.get_pyramid_level(l), pyramid->get_pyramid_level(l), _tmp_pyr.get_pyramid_level(l), ConvertPolicy::SATURATE); + } + + // Convert level 0 from S16 to U8 + _depthf.configure(_tmp_pyr.get_pyramid_level(0), output, ConvertPolicy::SATURATE, 0); + + _tmp_pyr.allocate(); +} + +void CLLaplacianReconstruct::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_addf == nullptr, "Unconfigured function"); + + const size_t last_level = _tmp_pyr.info()->num_levels() - 1; + + _addf[last_level].run(); + + // Run l = [last_level - 1, 0] + for(size_t l = last_level; l-- > 0;) + { + _scalef[l].run(); + _addf[l].run(); + } + + _depthf.run(); +} diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp new file mode 100644 index 0000000000..263fb51987 --- /dev/null +++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h" + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include +#include + +using namespace arm_compute; + +CLLocallyConnectedLayer::CLLocallyConnectedLayer() + : _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), _is_first_run(false) +{ +} + +void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2)); + + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3)); + ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 2); + } + + bool _has_bias = (biases != nullptr); + _is_first_run = true; + + // Get parameters for conv_info + unsigned int stride_x = 0; + unsigned int stride_y = 0; + unsigned int pad_x = 0; + unsigned int pad_y = 0; + std::tie(stride_x, stride_y) = conv_info.stride(); + std::tie(pad_x, pad_y) = conv_info.pad(); + + // Get convolved dimensions + unsigned int conv_w = 0; + unsigned int conv_h = 0; + std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), + stride_x, stride_y, pad_x, pad_y, conv_info.round()); + + ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one"); + ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(4) != (conv_w * conv_h), "Weights shape does not match the expected one"); + + // Create tensor to store the reshaped weights + const size_t mat_weights_cols = weights->info()->dimension(3); + const size_t mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + ((_has_bias) ? 1 : 0); + const size_t mat_weights_num = weights->info()->dimension(4); + + const TensorShape shape_wr(mat_weights_cols, mat_weights_rows, mat_weights_num); + + _weights_reshaped.allocator()->init(TensorInfo(shape_wr, 1, weights->info()->data_type())); + + // Create tensor to store im2col reshaped inputs + const size_t mat_input_cols = mat_weights_rows; + const size_t mat_input_rows = conv_w * conv_h; + TensorShape shape_im2col = input->info()->tensor_shape(); + shape_im2col.set(0, mat_input_cols); + shape_im2col.set(1, mat_input_rows); + shape_im2col.set(2, 1); + + _input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, input->info()->data_type())); + + // Create locally connected layer output tensor + TensorShape shape_gemm = _input_im2col_reshaped.info()->tensor_shape(); + shape_gemm.set(0, mat_weights_cols); + shape_gemm.set(1, mat_input_rows); + _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type())); + + // Configure kernels + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); + _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); + _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); + + // Allocate intermediate tensors + _weights_reshaped.allocator()->allocate(); + _input_im2col_reshaped.allocator()->allocate(); + _gemm_output.allocator()->allocate(); +} + +void CLLocallyConnectedLayer::run() +{ + // Run weights reshaping (Runs once for every configure) + if(_is_first_run) + { + _is_first_run = false; + CLScheduler::get().enqueue(_weights_reshape_kernel); + } + + // Run input reshaping + CLScheduler::get().enqueue(_input_im2col_kernel); + + // Runs vector matrix multiply on reshaped matrices + CLScheduler::get().enqueue(_mm_kernel); + + // Reshape output matrix + CLScheduler::get().enqueue(_output_col2im_kernel, false); +} diff --git a/src/runtime/CL/functions/CLMagnitude.cpp b/src/runtime/CL/functions/CLMagnitude.cpp new file mode 100644 index 0000000000..51088cb71f --- /dev/null +++ b/src/runtime/CL/functions/CLMagnitude.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLMagnitude.h" + +#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLMagnitude::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output, nullptr, mag_type); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLMeanStdDev.cpp b/src/runtime/CL/functions/CLMeanStdDev.cpp new file mode 100644 index 0000000000..56ba146790 --- /dev/null +++ b/src/runtime/CL/functions/CLMeanStdDev.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h" + +#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLMeanStdDev::CLMeanStdDev() + : _mean_stddev_kernel(), + _global_sum(), + _global_sum_squared() +{ +} + +void CLMeanStdDev::configure(const ICLImage *input, float *mean, float *stddev) +{ + _global_sum = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong)); + + if(stddev != nullptr) + { + _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong)); + } + + _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared); +} + +void CLMeanStdDev::run() +{ + CLScheduler::get().enqueue(_mean_stddev_kernel); +} diff --git a/src/runtime/CL/functions/CLMedian3x3.cpp b/src/runtime/CL/functions/CLMedian3x3.cpp new file mode 100644 index 0000000000..0c10f9aa08 --- /dev/null +++ b/src/runtime/CL/functions/CLMedian3x3.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLMedian3x3.h" + +#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void CLMedian3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLMinMaxLocation.cpp b/src/runtime/CL/functions/CLMinMaxLocation.cpp new file mode 100644 index 0000000000..ad783d8a53 --- /dev/null +++ b/src/runtime/CL/functions/CLMinMaxLocation.cpp @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h" + +#include "arm_compute/core/CL/CLHelpers.h" + +using namespace arm_compute; + +CLMinMaxLocation::CLMinMaxLocation() + : _min_max_kernel(), + _min_max_loc_kernel(), + _min_max_vals(), + _min_max_count_vals(), + _min(nullptr), + _max(nullptr), + _min_count(nullptr), + _max_count(nullptr), + _min_loc(nullptr), + _max_loc(nullptr) +{ +} + +void CLMinMaxLocation::configure(const ICLImage *input, int32_t *min, int32_t *max, CLCoordinates2DArray *min_loc, CLCoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count) +{ + ARM_COMPUTE_ERROR_ON(nullptr == min); + ARM_COMPUTE_ERROR_ON(nullptr == max); + + _min_max_vals = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, 2 * sizeof(int32_t)); + _min_max_count_vals = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, 2 * sizeof(uint32_t)); + _min = min; + _max = max; + _min_count = min_count; + _max_count = max_count; + _min_loc = min_loc; + _max_loc = max_loc; + + _min_max_kernel.configure(input, &_min_max_vals); + _min_max_loc_kernel.configure(input, &_min_max_vals, &_min_max_count_vals, _min_loc, _max_loc); +} + +void CLMinMaxLocation::run() +{ + cl::CommandQueue q = CLScheduler::get().queue(); + + CLScheduler::get().enqueue(_min_max_kernel, false); + CLScheduler::get().enqueue(_min_max_loc_kernel, false); + + // Update min and max + q.enqueueReadBuffer(_min_max_vals, CL_FALSE, 0 * sizeof(int32_t), sizeof(int32_t), _min); + q.enqueueReadBuffer(_min_max_vals, CL_FALSE, 1 * sizeof(int32_t), sizeof(int32_t), _max); + + // Update min and max count + if(_min_count != nullptr) + { + q.enqueueReadBuffer(_min_max_count_vals, CL_FALSE, 0 * sizeof(uint32_t), sizeof(uint32_t), _min_count); + } + if(_max_count != nullptr) + { + q.enqueueReadBuffer(_min_max_count_vals, CL_FALSE, 1 * sizeof(uint32_t), sizeof(uint32_t), _max_count); + } + + // Update min/max point arrays (Makes the kernel blocking) + if(_min_loc != nullptr) + { + unsigned int min_count = 0; + q.enqueueReadBuffer(_min_max_count_vals, CL_TRUE, 0 * sizeof(uint32_t), sizeof(uint32_t), &min_count); + size_t min_corner_size = std::min(static_cast(min_count), _min_loc->max_num_values()); + _min_loc->resize(min_corner_size); + } + if(_max_loc != nullptr) + { + unsigned int max_count = 0; + q.enqueueReadBuffer(_min_max_count_vals, CL_TRUE, 1 * sizeof(uint32_t), sizeof(uint32_t), &max_count); + size_t max_corner_size = std::min(static_cast(max_count), _max_loc->max_num_values()); + _max_loc->resize(max_corner_size); + } +} diff --git a/src/runtime/CL/functions/CLNonLinearFilter.cpp b/src/runtime/CL/functions/CLNonLinearFilter.cpp new file mode 100644 index 0000000000..b593a6cced --- /dev/null +++ b/src/runtime/CL/functions/CLNonLinearFilter.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h" + +#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLNonLinearFilter::configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp b/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp new file mode 100644 index 0000000000..ca7d5aede7 --- /dev/null +++ b/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h" + +#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLNonMaximaSuppression3x3::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + + if(border_mode != BorderMode::UNDEFINED) + { + _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT); + } + else + { + _border_handler.configure(input, _kernel->border_size(), BorderMode::UNDEFINED); + } +} diff --git a/src/runtime/CL/functions/CLNormalizationLayer.cpp b/src/runtime/CL/functions/CLNormalizationLayer.cpp new file mode 100644 index 0000000000..2d89ebd676 --- /dev/null +++ b/src/runtime/CL/functions/CLNormalizationLayer.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLNormalizationLayer::CLNormalizationLayer() + : _squared_input(), _norm_kernel(), _multiply_kernel(), _border_handler() +{ +} + +void CLNormalizationLayer::configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info) +{ + ARM_COMPUTE_ERROR_ON(input == nullptr); + + _squared_input.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, input->info()->data_type())); + + _norm_kernel.configure(input, &_squared_input, output, norm_info); + _multiply_kernel.configure(input, input, &_squared_input, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN); + // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel + _border_handler.configure(&_squared_input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0)); + + // Allocate intermediate buffers + _squared_input.allocator()->allocate(); +} + +void CLNormalizationLayer::run() +{ + CLScheduler::get().enqueue(_multiply_kernel, false); + CLScheduler::get().enqueue(_border_handler, false); + CLScheduler::get().enqueue(_norm_kernel, false); +} diff --git a/src/runtime/CL/functions/CLOpticalFlow.cpp b/src/runtime/CL/functions/CLOpticalFlow.cpp new file mode 100644 index 0000000000..a6b0eb3bec --- /dev/null +++ b/src/runtime/CL/functions/CLOpticalFlow.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/CL/CLPyramid.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLScharr3x3.h" + +using namespace arm_compute; + +CLOpticalFlow::CLOpticalFlow() + : _tracker_init_kernel(), _tracker_stage0_kernel(), _tracker_stage1_kernel(), _tracker_finalize_kernel(), _func_scharr(), _scharr_gx(), _scharr_gy(), _old_points(nullptr), + _new_points_estimates(nullptr), _new_points(nullptr), _old_points_internal(), _new_points_internal(), _coefficient_table(), _old_values(), _num_levels(0) +{ +} + +void CLOpticalFlow::configure(const CLPyramid *old_pyramid, const CLPyramid *new_pyramid, + const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points, + Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate, + BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(nullptr == old_pyramid); + ARM_COMPUTE_ERROR_ON(nullptr == new_pyramid); + ARM_COMPUTE_ERROR_ON(nullptr == old_points); + ARM_COMPUTE_ERROR_ON(nullptr == new_points_estimates); + ARM_COMPUTE_ERROR_ON(nullptr == new_points); + ARM_COMPUTE_ERROR_ON(old_pyramid->info()->num_levels() != new_pyramid->info()->num_levels()); + ARM_COMPUTE_ERROR_ON(0 == old_pyramid->info()->num_levels()); + ARM_COMPUTE_ERROR_ON(old_pyramid->info()->width() != new_pyramid->info()->width()); + ARM_COMPUTE_ERROR_ON(old_pyramid->info()->height() != new_pyramid->info()->height()); + ARM_COMPUTE_ERROR_ON(use_initial_estimate && old_points->num_values() != new_points_estimates->num_values()); + + // Set member variables + _old_points = old_points; + _new_points_estimates = new_points_estimates; + _new_points = new_points; + _num_levels = old_pyramid->info()->num_levels(); + + const float pyr_scale = old_pyramid->info()->scale(); + const int list_length = old_points->num_values(); + const int old_values_list_length = list_length * window_dimension * window_dimension; + + // Create kernels and tensors + _tracker_init_kernel = arm_compute::cpp14::make_unique(_num_levels); + _tracker_stage0_kernel = arm_compute::cpp14::make_unique(_num_levels); + _tracker_stage1_kernel = arm_compute::cpp14::make_unique(_num_levels); + _func_scharr = arm_compute::cpp14::make_unique(_num_levels); + _scharr_gx = arm_compute::cpp14::make_unique(_num_levels); + _scharr_gy = arm_compute::cpp14::make_unique(_num_levels); + + // Create internal keypoint arrays + _old_points_internal = arm_compute::cpp14::make_unique(list_length); + _old_points_internal->resize(list_length); + _new_points_internal = arm_compute::cpp14::make_unique(list_length); + _new_points_internal->resize(list_length); + _coefficient_table = arm_compute::cpp14::make_unique(list_length); + _coefficient_table->resize(list_length); + _old_values = arm_compute::cpp14::make_unique(old_values_list_length); + _old_values->resize(old_values_list_length); + _new_points->resize(list_length); + + for(size_t i = 0; i < _num_levels; ++i) + { + // Get images from the ith level of old and right pyramid + ICLImage *old_ith_input = old_pyramid->get_pyramid_level(i); + ICLImage *new_ith_input = new_pyramid->get_pyramid_level(i); + + // Get width and height of images + const unsigned int width_ith = old_ith_input->info()->dimension(0); + const unsigned int height_ith = new_ith_input->info()->dimension(1); + + // Initialize Scharr tensors + TensorInfo tensor_info(TensorShape(width_ith, height_ith), 1, DataType::S16); + _scharr_gx[i].allocator()->init(tensor_info); + _scharr_gy[i].allocator()->init(tensor_info); + + // Init Scharr kernel + _func_scharr[i].configure(old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value); + + // Init Lucas-Kanade init kernel + _tracker_init_kernel[i].configure(old_points, new_points_estimates, _old_points_internal.get(), _new_points_internal.get(), use_initial_estimate, i, _num_levels, pyr_scale); + + // Init Lucas-Kanade stage0 kernel + _tracker_stage0_kernel[i].configure(old_ith_input, &_scharr_gx[i], &_scharr_gy[i], + _old_points_internal.get(), _new_points_internal.get(), _coefficient_table.get(), _old_values.get(), + window_dimension, i); + + // Init Lucas-Kanade stage1 kernel + _tracker_stage1_kernel[i].configure(new_ith_input, _new_points_internal.get(), _coefficient_table.get(), _old_values.get(), + termination, epsilon, num_iterations, window_dimension, i); + + // Allocate intermediate buffers + _scharr_gx[i].allocator()->allocate(); + _scharr_gy[i].allocator()->allocate(); + } + + // Finalize Lucas-Kanade + _tracker_finalize_kernel.configure(_new_points_internal.get(), new_points); +} + +void CLOpticalFlow::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_num_levels == 0, "Unconfigured function"); + + for(unsigned int level = _num_levels; level > 0; --level) + { + // Run Scharr kernel + _func_scharr[level - 1].run(); + + // Run Lucas-Kanade init kernel + CLScheduler::get().enqueue(_tracker_init_kernel[level - 1]); + + // Run Lucas-Kanade stage0 kernel + CLScheduler::get().enqueue(_tracker_stage0_kernel[level - 1]); + + // Run Lucas-Kanade stage1 kernel + CLScheduler::get().enqueue(_tracker_stage1_kernel[level - 1]); + } + + CLScheduler::get().enqueue(_tracker_finalize_kernel, true); +} diff --git a/src/runtime/CL/functions/CLPhase.cpp b/src/runtime/CL/functions/CLPhase.cpp new file mode 100644 index 0000000000..a8cb22b06e --- /dev/null +++ b/src/runtime/CL/functions/CLPhase.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLPhase.h" + +#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLPhase::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, nullptr, output, MagnitudeType::L1NORM, phase_type); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp new file mode 100644 index 0000000000..8a86c2e203 --- /dev/null +++ b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" + +#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLPixelWiseMultiplication::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, + ConvertPolicy overflow_policy, RoundingPolicy rounding_policy) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output, scale, overflow_policy, rounding_policy); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp new file mode 100644 index 0000000000..1ef70f4a2b --- /dev/null +++ b/src/runtime/CL/functions/CLPoolingLayer.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" + +#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" +#include "arm_compute/core/Helpers.h" + +using namespace arm_compute; + +void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info) +{ + // Configure pooling kernel + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, pool_info); + _kernel = std::move(k); + + // Configure border depending on operation required + BorderMode border_mode = (PoolingType::MAX == pool_info.pool_type()) ? BorderMode::REPLICATE : BorderMode::CONSTANT; + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0)); +} diff --git a/src/runtime/CL/functions/CLRemap.cpp b/src/runtime/CL/functions/CLRemap.cpp new file mode 100644 index 0000000000..f6b1713c58 --- /dev/null +++ b/src/runtime/CL/functions/CLRemap.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLRemap.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLRemapKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" + +#include + +using namespace arm_compute; + +void CLRemap::configure(ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported"); + + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, map_x, map_y, output, policy, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLScale.cpp b/src/runtime/CL/functions/CLScale.cpp new file mode 100644 index 0000000000..043f873028 --- /dev/null +++ b/src/runtime/CL/functions/CLScale.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLScale.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLScaleKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Validate.h" + +using namespace arm_compute; + +void CLScale::configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(output == input); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, policy, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value); +} diff --git a/src/runtime/CL/functions/CLScharr3x3.cpp b/src/runtime/CL/functions/CLScharr3x3.cpp new file mode 100644 index 0000000000..c8bc465be6 --- /dev/null +++ b/src/runtime/CL/functions/CLScharr3x3.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLScharr3x3.h" + +#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void CLScharr3x3::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLSobel3x3.cpp b/src/runtime/CL/functions/CLSobel3x3.cpp new file mode 100644 index 0000000000..6b74ebaedb --- /dev/null +++ b/src/runtime/CL/functions/CLSobel3x3.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLSobel3x3.h" + +#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void CLSobel3x3::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLSobel5x5.cpp b/src/runtime/CL/functions/CLSobel5x5.cpp new file mode 100644 index 0000000000..098b546c1a --- /dev/null +++ b/src/runtime/CL/functions/CLSobel5x5.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLSobel5x5.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +using namespace arm_compute; + +CLSobel5x5::CLSobel5x5() + : _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y() +{ +} + +void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + + const bool run_sobel_x = output_x != nullptr; + const bool run_sobel_y = output_y != nullptr; + + TensorInfo tensor_info(input->info()->tensor_shape(), 1, DataType::S16); + + if(run_sobel_x && run_sobel_y) + { + _tmp_x.allocator()->init(tensor_info); + _tmp_y.allocator()->init(tensor_info); + _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _tmp_x.allocator()->allocate(); + _tmp_y.allocator()->allocate(); + } + else if(run_sobel_x) + { + _tmp_x.allocator()->init(tensor_info); + _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); + _tmp_x.allocator()->allocate(); + } + else if(run_sobel_y) + { + _tmp_y.allocator()->init(tensor_info); + _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); + _tmp_y.allocator()->allocate(); + } + _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value)); +} + +void CLSobel5x5::run() +{ + CLScheduler::get().enqueue(_border_handler, false); + CLScheduler::get().enqueue(_sobel_hor, false); + CLScheduler::get().enqueue(_sobel_vert); +} diff --git a/src/runtime/CL/functions/CLSobel7x7.cpp b/src/runtime/CL/functions/CLSobel7x7.cpp new file mode 100644 index 0000000000..db84fa99ae --- /dev/null +++ b/src/runtime/CL/functions/CLSobel7x7.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLSobel7x7.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +using namespace arm_compute; + +CLSobel7x7::CLSobel7x7() + : _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y() +{ +} + +void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + + const bool run_sobel_x = output_x != nullptr; + const bool run_sobel_y = output_y != nullptr; + + TensorInfo tensor_info(input->info()->tensor_shape(), 1, DataType::S32); + + if(run_sobel_x && run_sobel_y) + { + _tmp_x.allocator()->init(tensor_info); + _tmp_y.allocator()->init(tensor_info); + _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _tmp_x.allocator()->allocate(); + _tmp_y.allocator()->allocate(); + } + else if(run_sobel_x) + { + _tmp_x.allocator()->init(tensor_info); + _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); + _tmp_x.allocator()->allocate(); + } + else if(run_sobel_y) + { + _tmp_y.allocator()->init(tensor_info); + _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); + _tmp_y.allocator()->allocate(); + } + _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value)); +} + +void CLSobel7x7::run() +{ + CLScheduler::get().enqueue(_border_handler, false); + CLScheduler::get().enqueue(_sobel_hor, false); + CLScheduler::get().enqueue(_sobel_vert); +} diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp new file mode 100644 index 0000000000..2a78c58053 --- /dev/null +++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" + +#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLSoftmaxLayer::CLSoftmaxLayer() + : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp() +{ +} + +void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); + + // Create intermediate tensors shapes + _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type())); + + TensorShape shape = input->info()->tensor_shape(); + shape.set(0, 1); + TensorInfo tensor_info_max_sum(shape, input->info()->num_channels(), input->info()->data_type()); + _max.allocator()->init(tensor_info_max_sum); + _sum.allocator()->init(tensor_info_max_sum); + + // Configure Kernels + _max_kernel.configure(input, &_max); + _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum); + _norm_kernel.configure(&_tmp, &_sum, output); + + // Allocate intermediate buffers + _tmp.allocator()->allocate(); + _max.allocator()->allocate(); + _sum.allocator()->allocate(); +} + +void CLSoftmaxLayer::run() +{ + CLScheduler::get().enqueue(_max_kernel, false); + CLScheduler::get().enqueue(_shift_exp_sum_kernel, false); + CLScheduler::get().enqueue(_norm_kernel); +} diff --git a/src/runtime/CL/functions/CLTableLookup.cpp b/src/runtime/CL/functions/CLTableLookup.cpp new file mode 100644 index 0000000000..743ed5e73e --- /dev/null +++ b/src/runtime/CL/functions/CLTableLookup.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLTableLookup.h" + +#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLTableLookup::configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, lut, output); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLThreshold.cpp b/src/runtime/CL/functions/CLThreshold.cpp new file mode 100644 index 0000000000..e70f932d66 --- /dev/null +++ b/src/runtime/CL/functions/CLThreshold.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLThreshold.h" + +#include "arm_compute/core/CL/kernels/CLThresholdKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLThreshold::configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, threshold, false_value, true_value, type, upper); + _kernel = std::move(k); +} diff --git a/src/runtime/CL/functions/CLTranspose.cpp b/src/runtime/CL/functions/CLTranspose.cpp new file mode 100644 index 0000000000..d802b4fe77 --- /dev/null +++ b/src/runtime/CL/functions/CLTranspose.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLTranspose.h" + +#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +void CLTranspose::configure(const ICLTensor *input, ICLTensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} \ No newline at end of file diff --git a/src/runtime/CL/functions/CLWarpAffine.cpp b/src/runtime/CL/functions/CLWarpAffine.cpp new file mode 100644 index 0000000000..537e0d9397 --- /dev/null +++ b/src/runtime/CL/functions/CLWarpAffine.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLWarpAffine.h" + +#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void CLWarpAffine::configure(ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, matrix, policy); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CL/functions/CLWarpPerspective.cpp b/src/runtime/CL/functions/CLWarpPerspective.cpp new file mode 100644 index 0000000000..a552ab480d --- /dev/null +++ b/src/runtime/CL/functions/CLWarpPerspective.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h" + +#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void CLWarpPerspective::configure(ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, matrix, policy); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp new file mode 100644 index 0000000000..886933074d --- /dev/null +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CPP/CPPScheduler.h" + +#include "arm_compute/core/CPP/ICPPKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" + +#include +#include +#include +#include + +using namespace arm_compute; + +class arm_compute::Thread +{ +public: + /** Start a new thread + */ + Thread(); + Thread(const Thread &) = delete; + Thread &operator=(const Thread &) = delete; + Thread(Thread &&) = delete; + Thread &operator=(Thread &&) = delete; + /** Make the thread join + */ + ~Thread(); + /** Request the worker thread to start executing the given kernel + * This function will return as soon as the kernel has been sent to the worker thread. + * wait() needs to be called to ensure the execution is complete. + */ + void start(ICPPKernel *kernel, const Window &window); + /** Wait for the current kernel execution to complete + */ + void wait(); + /** Function ran by the worker thread + */ + void worker_thread(); + +private: + std::thread _thread; + ICPPKernel *_kernel{ nullptr }; + Window _window; + sem_t _wait_for_work; + sem_t _job_complete; + std::exception_ptr _current_exception; +}; + +Thread::Thread() + : _thread(), _window(), _wait_for_work(), _job_complete(), _current_exception(nullptr) +{ + int ret = sem_init(&_wait_for_work, 0, 0); + ARM_COMPUTE_ERROR_ON(ret < 0); + ARM_COMPUTE_UNUSED(ret); + + ret = sem_init(&_job_complete, 0, 0); + ARM_COMPUTE_ERROR_ON(ret < 0); + ARM_COMPUTE_UNUSED(ret); + + _thread = std::thread(&Thread::worker_thread, this); +} + +Thread::~Thread() +{ + ARM_COMPUTE_ERROR_ON(!_thread.joinable()); + + start(nullptr, Window()); + _thread.join(); + + int ret = sem_destroy(&_wait_for_work); + ARM_COMPUTE_ERROR_ON(ret < 0); + ARM_COMPUTE_UNUSED(ret); + + ret = sem_destroy(&_job_complete); + ARM_COMPUTE_ERROR_ON(ret < 0); + ARM_COMPUTE_UNUSED(ret); +} + +void Thread::start(ICPPKernel *kernel, const Window &window) +{ + _kernel = kernel; + _window = window; + int ret = sem_post(&_wait_for_work); + ARM_COMPUTE_UNUSED(ret); + ARM_COMPUTE_ERROR_ON(ret < 0); +} + +void Thread::wait() +{ + int ret = sem_wait(&_job_complete); + ARM_COMPUTE_UNUSED(ret); + ARM_COMPUTE_ERROR_ON(ret < 0); + if(_current_exception) + { + std::rethrow_exception(_current_exception); + } +} + +void Thread::worker_thread() +{ + while(sem_wait(&_wait_for_work) >= 0) + { + _current_exception = nullptr; + // Time to exit + if(_kernel == nullptr) + { + return; + } + + try + { + _window.validate(); + _kernel->run(_window); + } + catch(...) + { + _current_exception = std::current_exception(); + } + int ret = sem_post(&_job_complete); + ARM_COMPUTE_UNUSED(ret); + ARM_COMPUTE_ERROR_ON(ret < 0); + } + + ARM_COMPUTE_ERROR("Wait failed"); +} + +namespace +{ +void delete_threads(Thread *t) +{ + delete[] t; +} +} // namespace + +CPPScheduler &CPPScheduler::get() +{ + static CPPScheduler scheduler; + return scheduler; +} + +unsigned int CPPScheduler::num_threads() const +{ + return _num_threads; +} + +CPPScheduler::CPPScheduler() + : _num_threads(std::thread::hardware_concurrency()), + _threads(std::unique_ptr(new Thread[std::thread::hardware_concurrency() - 1], delete_threads)) +{ +} + +void CPPScheduler::set_num_threads(unsigned int num_threads) +{ + const unsigned int num_cores = std::thread::hardware_concurrency(); + _num_threads = num_threads == 0 ? num_cores : num_threads; +} + +void CPPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) +{ + ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); + + /** [Scheduler example] */ + const Window &max_window = kernel->window(); + const unsigned int num_iterations = max_window.num_iterations(split_dimension); + const unsigned int num_threads = std::min(num_iterations, _num_threads); + + if(!kernel->is_parallelisable() || 1 == num_threads) + { + kernel->run(max_window); + } + else + { + for(unsigned int t = 0; t < num_threads; ++t) + { + Window win = max_window.split_window(split_dimension, t, num_threads); + win.set_thread_id(t); + win.set_num_threads(num_threads); + + if(t != num_threads - 1) + { + _threads[t].start(kernel, win); + } + else + { + kernel->run(win); + } + } + + try + { + for(unsigned int t = 1; t < num_threads; ++t) + { + _threads[t - 1].wait(); + } + } + catch(const std::system_error &e) + { + std::cout << "Caught system_error with code " << e.code() << " meaning " << e.what() << '\n'; + } + } + /** [Scheduler example] */ +} diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp new file mode 100644 index 0000000000..f086813e91 --- /dev/null +++ b/src/runtime/CPP/SingleThreadScheduler.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/SingleThreadScheduler.h" + +#include "arm_compute/core/CPP/ICPPKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Utils.h" + +using namespace arm_compute; + +SingleThreadScheduler &SingleThreadScheduler::get() +{ + static SingleThreadScheduler scheduler; + return scheduler; +} + +void SingleThreadScheduler::set_num_threads(unsigned int num_threads) +{ + ARM_COMPUTE_UNUSED(num_threads); +} + +void SingleThreadScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) +{ + ARM_COMPUTE_UNUSED(split_dimension); + kernel->run(kernel->window()); +} + +unsigned int SingleThreadScheduler::num_threads() const +{ + return 1; +} diff --git a/src/runtime/Distribution1D.cpp b/src/runtime/Distribution1D.cpp new file mode 100644 index 0000000000..b06767499b --- /dev/null +++ b/src/runtime/Distribution1D.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/Distribution1D.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" + +#include + +using namespace arm_compute; + +Distribution1D::Distribution1D(size_t num_bins, int32_t offset, uint32_t range) + : IDistribution1D(num_bins, offset, range), _data(arm_compute::cpp14::make_unique(num_bins)) +{ +} + +uint32_t *Distribution1D::buffer() const +{ + ARM_COMPUTE_ERROR_ON(nullptr == _data); + return _data.get(); +} diff --git a/src/runtime/HOG.cpp b/src/runtime/HOG.cpp new file mode 100644 index 0000000000..5d533dded4 --- /dev/null +++ b/src/runtime/HOG.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/HOG.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" + +using namespace arm_compute; + +HOG::HOG() + : IHOG(), _info(), _descriptor(nullptr) +{ +} + +void HOG::init(const HOGInfo &input) +{ + ARM_COMPUTE_ERROR_ON(nullptr != _descriptor); + _info = input; + _descriptor = arm_compute::cpp14::make_unique(_info.descriptor_size()); +} + +float *HOG::descriptor() const +{ + return _descriptor.get(); +} + +const HOGInfo *HOG::info() const +{ + return &_info; +} diff --git a/src/runtime/ILutAllocator.cpp b/src/runtime/ILutAllocator.cpp new file mode 100644 index 0000000000..fb961638f1 --- /dev/null +++ b/src/runtime/ILutAllocator.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/ILutAllocator.h" + +#include "arm_compute/core/Utils.h" + +using namespace arm_compute; + +ILutAllocator::ILutAllocator() + : _num_elements(0), _data_type(DataType::U8) +{ +} + +void ILutAllocator::init(size_t num_elements, DataType data_type) +{ + // Init internal metadata + _num_elements = num_elements; + _data_type = data_type; + + // Allocate the image's memory + allocate(); +} + +size_t ILutAllocator::num_elements() const +{ + return _num_elements; +} + +DataType ILutAllocator::type() const +{ + return _data_type; +} + +size_t ILutAllocator::size() const +{ + return data_size_from_type(_data_type) * num_elements(); +} diff --git a/src/runtime/ITensorAllocator.cpp b/src/runtime/ITensorAllocator.cpp new file mode 100644 index 0000000000..8294201384 --- /dev/null +++ b/src/runtime/ITensorAllocator.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/ITensorAllocator.h" + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" + +using namespace arm_compute; + +ITensorAllocator::ITensorAllocator() + : _info() +{ +} + +void ITensorAllocator::init(const TensorInfo &input) +{ + _info = input; +} + +TensorInfo &ITensorAllocator::info() +{ + return _info; +} + +const TensorInfo &ITensorAllocator::info() const +{ + return _info; +} diff --git a/src/runtime/Lut.cpp b/src/runtime/Lut.cpp new file mode 100644 index 0000000000..1b3daf1f60 --- /dev/null +++ b/src/runtime/Lut.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/Lut.h" + +#include + +using namespace arm_compute; + +Lut::Lut() + : _allocator() +{ +} + +Lut::Lut(size_t num_elements, DataType data_type) + : _allocator() +{ + _allocator.init(num_elements, data_type); +} + +size_t Lut::num_elements() const +{ + return _allocator.num_elements(); +} + +uint32_t Lut::index_offset() const +{ + return (DataType::S16 == _allocator.type()) ? num_elements() / 2 : 0; +} + +size_t Lut::size_in_bytes() const +{ + return _allocator.size(); +} + +DataType Lut::type() const +{ + return _allocator.type(); +} + +uint8_t *Lut::buffer() const +{ + return _allocator.data(); +} + +void Lut::clear() +{ + ARM_COMPUTE_ERROR_ON(this->buffer() == nullptr); + std::memset(this->buffer(), 0, this->size_in_bytes()); +} + +ILutAllocator *Lut::allocator() +{ + return &_allocator; +} diff --git a/src/runtime/LutAllocator.cpp b/src/runtime/LutAllocator.cpp new file mode 100644 index 0000000000..17baf21f45 --- /dev/null +++ b/src/runtime/LutAllocator.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/LutAllocator.h" + +#include "arm_compute/core/Helpers.h" + +using namespace arm_compute; + +LutAllocator::LutAllocator() + : _buffer(nullptr) +{ +} + +uint8_t *LutAllocator::data() const +{ + return _buffer.get(); +} + +void LutAllocator::allocate() +{ + _buffer = arm_compute::cpp14::make_unique(size()); +} + +uint8_t *LutAllocator::lock() +{ + return _buffer.get(); +} + +void LutAllocator::unlock() +{ +} diff --git a/src/runtime/MultiHOG.cpp b/src/runtime/MultiHOG.cpp new file mode 100644 index 0000000000..003dc93895 --- /dev/null +++ b/src/runtime/MultiHOG.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/MultiHOG.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IMultiHOG.h" + +using namespace arm_compute; + +MultiHOG::MultiHOG(size_t num_models) + : _num_models(num_models), _model(arm_compute::cpp14::make_unique(_num_models)) +{ +} + +size_t MultiHOG::num_models() const +{ + return _num_models; +} + +IHOG *MultiHOG::model(size_t index) +{ + ARM_COMPUTE_ERROR_ON(index >= _num_models); + return (_model.get() + index); +} + +const IHOG *MultiHOG::model(size_t index) const +{ + ARM_COMPUTE_ERROR_ON(index >= _num_models); + return (_model.get() + index); +} diff --git a/src/runtime/MultiImage.cpp b/src/runtime/MultiImage.cpp new file mode 100644 index 0000000000..def1487c5e --- /dev/null +++ b/src/runtime/MultiImage.cpp @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/MultiImage.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/TensorAllocator.h" + +using namespace arm_compute; + +MultiImage::MultiImage() + : _info(), _plane() +{ +} + +const MultiImageInfo *MultiImage::info() const +{ + return &_info; +} + +void MultiImage::init(unsigned int width, unsigned int height, Format format) +{ + internal_init(width, height, format, false); +} + +void MultiImage::init_auto_padding(unsigned int width, unsigned int height, Format format) +{ + internal_init(width, height, format, true); +} + +void MultiImage::internal_init(unsigned int width, unsigned int height, Format format, bool auto_padding) +{ + TensorInfo info(width, height, Format::U8); + + if(auto_padding) + { + info.auto_padding(); + } + + switch(format) + { + case Format::U8: + case Format::S16: + case Format::U16: + case Format::S32: + case Format::F16: + case Format::F32: + case Format::U32: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + { + TensorInfo info_full(width, height, format); + + if(auto_padding) + { + info_full.auto_padding(); + } + + std::get<0>(_plane).allocator()->init(info_full); + break; + } + case Format::NV12: + case Format::NV21: + { + TensorInfo info_uv88(width / 2, height / 2, Format::UV88); + + if(auto_padding) + { + info_uv88.auto_padding(); + } + + std::get<0>(_plane).allocator()->init(info); + std::get<1>(_plane).allocator()->init(info_uv88); + break; + } + case Format::IYUV: + { + TensorInfo info_sub2(width / 2, height / 2, Format::U8); + + if(auto_padding) + { + info_sub2.auto_padding(); + } + + std::get<0>(_plane).allocator()->init(info); + std::get<1>(_plane).allocator()->init(info_sub2); + std::get<2>(_plane).allocator()->init(info_sub2); + break; + } + case Format::YUV444: + std::get<0>(_plane).allocator()->init(info); + std::get<1>(_plane).allocator()->init(info); + std::get<2>(_plane).allocator()->init(info); + break; + default: + ARM_COMPUTE_ERROR("Not supported"); + break; + } + + _info.init(width, height, format); +} + +void MultiImage::allocate() +{ + switch(_info.format()) + { + case Format::U8: + case Format::S16: + case Format::U16: + case Format::S32: + case Format::F16: + case Format::F32: + case Format::U32: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + std::get<0>(_plane).allocator()->allocate(); + break; + case Format::NV12: + case Format::NV21: + std::get<0>(_plane).allocator()->allocate(); + std::get<1>(_plane).allocator()->allocate(); + break; + case Format::IYUV: + case Format::YUV444: + std::get<0>(_plane).allocator()->allocate(); + std::get<1>(_plane).allocator()->allocate(); + std::get<2>(_plane).allocator()->allocate(); + break; + default: + ARM_COMPUTE_ERROR("Not supported"); + break; + } +} + +void MultiImage::create_subimage(MultiImage *image, const Coordinates &coords, unsigned int width, unsigned int height) +{ + arm_compute::Format format = image->info()->format(); + const TensorInfo info(width, height, Format::U8); + + switch(format) + { + case Format::U8: + case Format::S16: + case Format::U16: + case Format::S32: + case Format::F32: + case Format::F16: + case Format::U32: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + { + const TensorInfo info_full(width, height, format); + std::get<0>(_plane).allocator()->init(*dynamic_cast(image->plane(0))->allocator(), coords, info_full); + break; + } + case Format::NV12: + case Format::NV21: + { + const TensorInfo info_uv88(width / 2, height / 2, Format::UV88); + std::get<0>(_plane).allocator()->init(*dynamic_cast(image->plane(0))->allocator(), coords, info); + std::get<1>(_plane).allocator()->init(*dynamic_cast(image->plane(1))->allocator(), coords, info_uv88); + break; + } + case Format::IYUV: + { + const TensorInfo info_sub2(width / 2, height / 2, Format::U8); + std::get<0>(_plane).allocator()->init(*dynamic_cast(image->plane(0))->allocator(), coords, info); + std::get<1>(_plane).allocator()->init(*dynamic_cast(image->plane(1))->allocator(), coords, info_sub2); + std::get<2>(_plane).allocator()->init(*dynamic_cast(image->plane(2))->allocator(), coords, info_sub2); + break; + } + case Format::YUV444: + std::get<0>(_plane).allocator()->init(*dynamic_cast(image->plane(0))->allocator(), coords, info); + std::get<1>(_plane).allocator()->init(*dynamic_cast(image->plane(0))->allocator(), coords, info); + std::get<2>(_plane).allocator()->init(*dynamic_cast(image->plane(0))->allocator(), coords, info); + break; + default: + ARM_COMPUTE_ERROR("Not supported"); + break; + } + + _info.init(width, height, format); +} + +Image *MultiImage::plane(unsigned int index) +{ + return &_plane[index]; +} + +const Image *MultiImage::plane(unsigned int index) const +{ + return &_plane[index]; +} diff --git a/src/runtime/NEON/INESimpleFunction.cpp b/src/runtime/NEON/INESimpleFunction.cpp new file mode 100644 index 0000000000..6f0da85fc8 --- /dev/null +++ b/src/runtime/NEON/INESimpleFunction.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +INESimpleFunction::INESimpleFunction() + : _kernel(), _border_handler() +{ +} + +void INESimpleFunction::run() +{ + _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(_kernel.get(), Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEAbsoluteDifference.cpp b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp new file mode 100644 index 0000000000..b39feb3a2b --- /dev/null +++ b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" + +#include + +using namespace arm_compute; + +void NEAbsoluteDifference::configure(const ITensor *input1, const ITensor *input2, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEAccumulate.cpp b/src/runtime/NEON/functions/NEAccumulate.cpp new file mode 100644 index 0000000000..c39abfc540 --- /dev/null +++ b/src/runtime/NEON/functions/NEAccumulate.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEAccumulate.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h" + +#include + +using namespace arm_compute; + +void NEAccumulate::configure(const ITensor *input, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} + +void NEAccumulateWeighted::configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16) +{ + if(use_fp16) + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, alpha, output); + _kernel = std::move(k); + } + else + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, alpha, output); + _kernel = std::move(k); + } +} + +void NEAccumulateSquared::configure(const ITensor *input, uint32_t shift, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, shift, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp new file mode 100644 index 0000000000..f5d81d7cd8 --- /dev/null +++ b/src/runtime/NEON/functions/NEActivationLayer.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" + +using namespace arm_compute; + +void NEActivationLayer::configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, activation_info); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEArithmeticAddition.cpp b/src/runtime/NEON/functions/NEArithmeticAddition.cpp new file mode 100644 index 0000000000..50cc38b489 --- /dev/null +++ b/src/runtime/NEON/functions/NEArithmeticAddition.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" + +#include + +using namespace arm_compute; + +void NEArithmeticAddition::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output, policy); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp new file mode 100644 index 0000000000..a3d27c0ed6 --- /dev/null +++ b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" + +#include + +using namespace arm_compute; + +void NEArithmeticSubtraction::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output, policy); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp new file mode 100644 index 0000000000..a24429c6de --- /dev/null +++ b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NEBatchNormalizationLayer::NEBatchNormalizationLayer() + : _norm_kernel() +{ +} + +void NEBatchNormalizationLayer::configure(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon) +{ + // Configure kernel + _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon); +} + +void NEBatchNormalizationLayer::run() +{ + NEScheduler::get().schedule(&_norm_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEBitwiseAnd.cpp b/src/runtime/NEON/functions/NEBitwiseAnd.cpp new file mode 100644 index 0000000000..5aafc51dc0 --- /dev/null +++ b/src/runtime/NEON/functions/NEBitwiseAnd.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" + +#include + +using namespace arm_compute; + +void NEBitwiseAnd::configure(const ITensor *input1, const ITensor *input2, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEBitwiseNot.cpp b/src/runtime/NEON/functions/NEBitwiseNot.cpp new file mode 100644 index 0000000000..af3df6e46a --- /dev/null +++ b/src/runtime/NEON/functions/NEBitwiseNot.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" + +#include + +using namespace arm_compute; + +void NEBitwiseNot::configure(const ITensor *input, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEBitwiseOr.cpp b/src/runtime/NEON/functions/NEBitwiseOr.cpp new file mode 100644 index 0000000000..d12c5e5f6f --- /dev/null +++ b/src/runtime/NEON/functions/NEBitwiseOr.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" + +#include + +using namespace arm_compute; + +void NEBitwiseOr::configure(const ITensor *input1, const ITensor *input2, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEBitwiseXor.cpp b/src/runtime/NEON/functions/NEBitwiseXor.cpp new file mode 100644 index 0000000000..65c943e64c --- /dev/null +++ b/src/runtime/NEON/functions/NEBitwiseXor.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h" + +#include + +using namespace arm_compute; + +void NEBitwiseXor::configure(const ITensor *input1, const ITensor *input2, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEBox3x3.cpp b/src/runtime/NEON/functions/NEBox3x3.cpp new file mode 100644 index 0000000000..7f0b45d34c --- /dev/null +++ b/src/runtime/NEON/functions/NEBox3x3.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEBox3x3.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void NEBox3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value, bool use_fp16) +{ + if(use_fp16) + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + } + else + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + } + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp new file mode 100644 index 0000000000..26f31f557b --- /dev/null +++ b/src/runtime/NEON/functions/NECannyEdge.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NECannyEdge.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" +#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" +#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include +#include + +using namespace arm_compute; + +NECannyEdge::NECannyEdge() + : _sobel(), _gradient(), _non_max_suppr(), _edge_trace(), _border_mag_gradient(), _border_edge_trace(), _gx(), _gy(), _magnitude(), _phase(), _nonmax(), _output(nullptr) +{ +} + +void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value, + bool use_fp16) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(gradient_size < 3); + ARM_COMPUTE_ERROR_ON(gradient_size > 7); + ARM_COMPUTE_ERROR_ON(lower_thr > upper_thr); + ARM_COMPUTE_ERROR_ON((1 != norm_type) && (2 != norm_type)); + + _output = output; + + const TensorShape &shape = input->info()->tensor_shape(); + TensorInfo gradient_info; + TensorInfo magnitude_info; + + // Initialize images + if(gradient_size < 7) + { + gradient_info.init(shape, Format::S16); + magnitude_info.init(shape, Format::U16); + } + else + { + gradient_info.init(shape, Format::S32); + magnitude_info.init(shape, Format::U32); + } + + _gx.allocator()->init(gradient_info); + _gy.allocator()->init(gradient_info); + _magnitude.allocator()->init(magnitude_info); + + TensorInfo info(shape, Format::U8); + _phase.allocator()->init(info); + _nonmax.allocator()->init(info); + + // Configure/Init sobelNxN + if(gradient_size == 3) + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + } + else if(gradient_size == 5) + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + } + else if(gradient_size == 7) + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + } + else + { + ARM_COMPUTE_ERROR("Gradient size not supported\n"); + } + + // Configure gradient + if(use_fp16) + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(&_gx, &_gy, &_magnitude, &_phase, norm_type); + _gradient = std::move(k); + } + else + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(&_gx, &_gy, &_magnitude, &_phase, norm_type); + _gradient = std::move(k); + } + + // Configure non-maxima suppression + _non_max_suppr.configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED); + + // Fill border around magnitude image as non-maxima suppression will access + // it. If border mode is undefined filling the border is a nop. + _border_mag_gradient.configure(&_magnitude, _non_max_suppr.border_size(), border_mode, constant_border_value); + + // Configure edge tracing + _edge_trace.configure(&_nonmax, output); + + // Fill border with "No edge" to stop recursion in edge trace + _border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, 0); + + // Allocate intermediate tensors + _gx.allocator()->allocate(); + _gy.allocator()->allocate(); + _phase.allocator()->allocate(); + _magnitude.allocator()->allocate(); + _nonmax.allocator()->allocate(); +} + +void NECannyEdge::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function"); + ARM_COMPUTE_ERROR_ON(_output == nullptr); + + // Run sobelNxN + _sobel->run(); + + // Fill border before non-maxima suppression. Nop for border mode undefined. + _border_mag_gradient.run(_border_mag_gradient.window()); + + // Run gradient + NEScheduler::get().schedule(_gradient.get(), Window::DimY); + + // Run non-maxima suppression + NEScheduler::get().schedule(&_non_max_suppr, Window::DimY); + + ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr); + memset(_output->buffer(), 0, _output->info()->total_size()); + + // Fill border before edge trace + _border_edge_trace.run(_border_edge_trace.window()); + + // Run edge tracing + _edge_trace.run(_edge_trace.window()); +} diff --git a/src/runtime/NEON/functions/NEChannelCombine.cpp b/src/runtime/NEON/functions/NEChannelCombine.cpp new file mode 100644 index 0000000000..84d4fff4ff --- /dev/null +++ b/src/runtime/NEON/functions/NEChannelCombine.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h" + +#include + +using namespace arm_compute; + +void NEChannelCombine::configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(plane0, plane1, plane2, plane3, output); + _kernel = std::move(k); +} + +void NEChannelCombine::configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(plane0, plane1, plane2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEChannelExtract.cpp b/src/runtime/NEON/functions/NEChannelExtract.cpp new file mode 100644 index 0000000000..634e918eac --- /dev/null +++ b/src/runtime/NEON/functions/NEChannelExtract.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h" + +#include + +using namespace arm_compute; + +void NEChannelExtract::configure(const ITensor *input, Channel channel, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, channel, output); + _kernel = std::move(k); +} + +void NEChannelExtract::configure(const IMultiImage *input, Channel channel, IImage *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, channel, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEColorConvert.cpp b/src/runtime/NEON/functions/NEColorConvert.cpp new file mode 100644 index 0000000000..bbaa832284 --- /dev/null +++ b/src/runtime/NEON/functions/NEColorConvert.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEColorConvert.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" + +#include + +using namespace arm_compute; + +void NEColorConvert::configure(const ITensor *input, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} + +void NEColorConvert::configure(const IMultiImage *input, IImage *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} + +void NEColorConvert::configure(const IImage *input, IMultiImage *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} + +void NEColorConvert::configure(const IMultiImage *input, IMultiImage *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp new file mode 100644 index 0000000000..3f39ae2cbd --- /dev/null +++ b/src/runtime/NEON/functions/NEConvolution.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEConvolution.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include +#include + +using namespace arm_compute; + +void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} + +template +NEConvolutionSquare::NEConvolutionSquare() + : _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler() +{ +} + +template +void NEConvolutionSquare::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(conv == nullptr); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); + + std::array conv_col{ { 0 } }; + std::array conv_row{ { 0 } }; + + _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size); + + if(_is_separable) + { + DataType intermediate_type = DataType::UNKNOWN; + std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), matrix_size); + + _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type)); + + if(scale == 0) + { + scale = calculate_matrix_scale(conv, matrix_size); + } + + _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); + _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED); + + _tmp.allocator()->allocate(); + + _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + } + else + { + _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); + _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value)); + } +} + +template +void NEConvolutionSquare::run() +{ + _border_handler.run(_border_handler.window()); + + if(_is_separable) + { + NEScheduler::get().schedule(&_kernel_hor, Window::DimY); + NEScheduler::get().schedule(&_kernel_vert, Window::DimY); + } + else + { + NEScheduler::get().schedule(&_kernel, Window::DimY); + } +} + +template class arm_compute::NEConvolutionSquare<5>; +template class arm_compute::NEConvolutionSquare<7>; +template class arm_compute::NEConvolutionSquare<9>; + +void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp new file mode 100644 index 0000000000..bd688cffb6 --- /dev/null +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +#include +#include + +using namespace arm_compute; + +NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights() + : _weights_reshape_kernel(), _weights_transposed_kernel(), _weights_reshaped(), _transpose1xW(false) +{ +} + +void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output); + ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); + + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, biases); + ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3)); + ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); + } + + // Check if bias are present, if yes they will be embedded to the weights matrix + const bool _has_bias = (biases != nullptr); + + _transpose1xW = transpose1xW; + + if(transpose1xW) + { + // Create tensor to store the reshaped weights + const unsigned int mat_weights_cols = weights->info()->dimension(3); + const unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + (_has_bias ? 1 : 0); + TensorShape shape_wr(mat_weights_cols, mat_weights_rows); + TensorInfo info_wr(shape_wr, 1, weights->info()->data_type(), weights->info()->fixed_point_position()); + + _weights_reshaped.allocator()->init(info_wr); + _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); + _weights_transposed_kernel.configure(&_weights_reshaped, output); + _weights_reshaped.allocator()->allocate(); + } + else + { + _weights_reshape_kernel.configure(weights, biases, output); + } +} + +void NEConvolutionLayerReshapeWeights::run() +{ + NEScheduler::get().schedule(&_weights_reshape_kernel, 3); + if(_transpose1xW) + { + NEScheduler::get().schedule(&_weights_transposed_kernel, Window::DimY); + } +} + +NEConvolutionLayer::NEConvolutionLayer() + : _input_im2col_kernel(), _input_interleave_kernel(), _reshape_weights(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), + _gemm_output(), _has_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false) +{ +} + +void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights, output); + ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); + + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases); + ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && biases->info()->dimension(0) != weights->info()->dimension(3)); + ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); + } + + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + _has_bias = (biases != nullptr); + _are_weights_reshaped = weights_info.are_reshaped(); + + // Get parameters from conv_info + unsigned int stride_x = 0; + unsigned int stride_y = 0; + unsigned int pad_x = 0; + unsigned int pad_y = 0; + std::tie(stride_x, stride_y) = conv_info.stride(); + std::tie(pad_x, pad_y) = conv_info.pad(); + + // Get convolved dimensions + unsigned int conv_w = 0; + unsigned int conv_h = 0; + + const unsigned int kernel_width = (_are_weights_reshaped) ? weights_info.kernel_size() : weights->info()->dimension(0); + std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, + stride_x, stride_y, pad_x, pad_y, conv_info.round()); + ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one"); + + // Check if its a "fully connected" convolution + _is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1)); + + unsigned int mat_weights_cols = weights->info()->dimension(3); + unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + (_has_bias ? 1 : 0); + + // Reshape weights if needed + if(_are_weights_reshaped) + { + mat_weights_cols = output->info()->dimension(2); + const unsigned int quarter_reshaped_cols = weights->info()->dimension(0) / 4; + mat_weights_rows = (_has_bias ? 1 + quarter_reshaped_cols : quarter_reshaped_cols); + } + else + { + if(_is_fully_connected_convolution) + { + // Create tensor to store the reshaped weights + TensorShape shape_wr(mat_weights_cols, mat_weights_rows); + TensorInfo info_wr(shape_wr, 1, dt, fixed_point_position); + _weights_reshaped.allocator()->init(info_wr); + _reshape_weights.configure(weights, biases, &_weights_reshaped, false /* 1xW transpose */); + } + else + { + // Create tensor to store transposed weights + const float transpose_width = 16.0f / input->info()->element_size(); + TensorShape shape_wt(mat_weights_rows * static_cast(transpose_width), static_cast(std::ceil(mat_weights_cols / transpose_width))); + TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position); + _weights_reshaped.allocator()->init(info_wt); + _reshape_weights.configure(weights, biases, &_weights_reshaped, true /* 1xW transpose */); + } + weights = &_weights_reshaped; + } + + // Create tensor to store im2col reshaped inputs + const unsigned int mat_input_cols = mat_weights_rows; + const unsigned int mat_input_rows = conv_w * conv_h; + TensorShape shape_im2col = input->info()->tensor_shape(); + shape_im2col.set(0, mat_input_cols); + shape_im2col.set(1, mat_input_rows); + shape_im2col.set(2, 1); + _input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); + + // Create tensor (interleave) to prepare input tensor for GEMM + if(!_is_fully_connected_convolution) + { + TensorShape shape_interleaved = shape_im2col; + shape_interleaved.set(0, shape_interleaved.x() * 4); + shape_interleaved.set(1, std::ceil(shape_interleaved.y() / 4.f)); + _input_interleaved_reshaped.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); + } + + // Create GEMM output tensor + TensorShape shape_gemm = _input_im2col_reshaped.info()->tensor_shape(); + shape_gemm.set(0, mat_weights_cols); + shape_gemm.set(1, mat_input_rows); + _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, dt, fixed_point_position)); + + // Configure kernels + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + if(_is_fully_connected_convolution) + { + _mm_kernel.configure(&_input_im2col_reshaped, weights, &_gemm_output, 1.0f); + } + else + { + _input_interleave_kernel.configure(&_input_im2col_reshaped, &_input_interleaved_reshaped); + _mm_kernel.configure(&_input_interleaved_reshaped, weights, &_gemm_output, 1.0f); + } + _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); + + // Allocate intermediate tensor + if(!_are_weights_reshaped) + { + _weights_reshaped.allocator()->allocate(); + } + _input_im2col_reshaped.allocator()->allocate(); + if(!_is_fully_connected_convolution) + { + _input_interleaved_reshaped.allocator()->allocate(); + } + _gemm_output.allocator()->allocate(); +} + +void NEConvolutionLayer::run() +{ + // Run weights reshaping (Runs once for every configure) + if(!_are_weights_reshaped) + { + _are_weights_reshaped = true; + _reshape_weights.run(); + } + + // Run input reshaping + NEScheduler::get().schedule(&_input_im2col_kernel, Window::DimY); + if(!_is_fully_connected_convolution) + { + // Run interleave + NEScheduler::get().schedule(&_input_interleave_kernel, Window::DimY); + } + + // Runs matrix multiply on reshaped matrices + NEScheduler::get().schedule(&_mm_kernel, Window::DimY); + + // Reshape output matrix + NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEDepthConcatenate.cpp b/src/runtime/NEON/functions/NEDepthConcatenate.cpp new file mode 100644 index 0000000000..7d2c5494a9 --- /dev/null +++ b/src/runtime/NEON/functions/NEDepthConcatenate.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NEDepthConcatenate::NEDepthConcatenate() + : _inputs_vector(), _concat_kernels_vector(), _border_handlers_vector(), _num_inputs(0) +{ +} + +void NEDepthConcatenate::configure(std::vector inputs_vector, ITensor *output) +{ + ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); + + _num_inputs = inputs_vector.size(); + _concat_kernels_vector = arm_compute::cpp14::make_unique(_num_inputs); + _border_handlers_vector = arm_compute::cpp14::make_unique(_num_inputs); + + unsigned int depth_offset = 0; + for(unsigned int i = 0; i < _num_inputs; ++i) + { + _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); + _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0)); + + depth_offset += inputs_vector.at(i)->info()->dimension(2); + } +} + +void NEDepthConcatenate::run() +{ + for(unsigned i = 0; i < _num_inputs; ++i) + { + NEScheduler::get().schedule(&_border_handlers_vector[i], Window::DimX); + NEScheduler::get().schedule(&_concat_kernels_vector[i], Window::DimX); + } +} diff --git a/src/runtime/NEON/functions/NEDepthConvert.cpp b/src/runtime/NEON/functions/NEDepthConvert.cpp new file mode 100644 index 0000000000..a339cae316 --- /dev/null +++ b/src/runtime/NEON/functions/NEDepthConvert.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h" + +#include + +using namespace arm_compute; + +void NEDepthConvert::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F32); + ARM_COMPUTE_ERROR_ON(input == output); + ARM_COMPUTE_ERROR_ON(input->info()->data_type() == output->info()->data_type()); + + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, policy, shift); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEDerivative.cpp b/src/runtime/NEON/functions/NEDerivative.cpp new file mode 100644 index 0000000000..2887c13233 --- /dev/null +++ b/src/runtime/NEON/functions/NEDerivative.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEDerivative.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NEDerivative::NEDerivative() + : _kernel(), _border_handler() +{ +} + +void NEDerivative::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); + + _kernel.configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _border_handler.configure(input, 1, border_mode, PixelValue(constant_border_value)); +} + +void NEDerivative::run() +{ + _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEDilate.cpp b/src/runtime/NEON/functions/NEDilate.cpp new file mode 100644 index 0000000000..0c016f14f9 --- /dev/null +++ b/src/runtime/NEON/functions/NEDilate.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEDilate.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEDilateKernel.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void NEDilate::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp new file mode 100644 index 0000000000..3f3e7710fb --- /dev/null +++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +#include +#include + +using namespace arm_compute; + +NEDirectConvolutionLayer::NEDirectConvolutionLayer() + : _accumulate_bias_kernel(), _conv_kernel(), _input_border_handler(), _accumulator() +{ +} + +void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32); + + // Free accumulator + if(_accumulator.buffer() != nullptr) + { + _accumulator.allocator()->free(); + } + + // Allocate the intermediate accumulator tensor in case of fixed point input + if(output->info()->data_type() == DataType::QS8) + { + _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::QS16, output->info()->fixed_point_position())); + _conv_kernel.configure(input, weights, &_accumulator, conv_info); + _accumulate_bias_kernel.configure(&_accumulator, bias, output); + _accumulator.allocator()->allocate(); + } + else + { + _conv_kernel.configure(input, weights, output, conv_info); + _accumulate_bias_kernel.configure(output, bias); + } + + // Add zero padding XY + _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0)); +} + +void NEDirectConvolutionLayer::run() +{ + _input_border_handler.run(_input_border_handler.window()); + + NEScheduler::get().schedule(&_conv_kernel, Window::DimZ); + NEScheduler::get().schedule(&_accumulate_bias_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp new file mode 100644 index 0000000000..f6ec677e44 --- /dev/null +++ b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NEEqualizeHistogram::NEEqualizeHistogram() + : _histogram_kernel(), _cd_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8) +{ +} + +void NEEqualizeHistogram::configure(const IImage *input, IImage *output) +{ + ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); + ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + + // Configure kernels + _histogram_kernel.configure(input, &_hist); + _cd_histogram_kernel.configure(input, &_hist, &_cum_dist, &_cd_lut); + _map_histogram_kernel.configure(input, &_cd_lut, output); +} + +void NEEqualizeHistogram::run() +{ + // Calculate histogram of input. + NEScheduler::get().schedule(&_histogram_kernel, Window::DimY); + + // Calculate cumulative distribution of histogram and create LUT. + _cd_histogram_kernel.run(_cd_histogram_kernel.window()); + + // Map input to output using created LUT. + NEScheduler::get().schedule(&_map_histogram_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEErode.cpp b/src/runtime/NEON/functions/NEErode.cpp new file mode 100644 index 0000000000..9b011db845 --- /dev/null +++ b/src/runtime/NEON/functions/NEErode.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEErode.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEErodeKernel.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void NEErode::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NEFastCorners.cpp b/src/runtime/NEON/functions/NEFastCorners.cpp new file mode 100644 index 0000000000..33a58f1904 --- /dev/null +++ b/src/runtime/NEON/functions/NEFastCorners.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEFastCorners.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/TensorAllocator.h" + +using namespace arm_compute; + +NEFastCorners::NEFastCorners() + : _fast_corners_kernel(), + _border_handler(), + _nonmax_kernel(), + _fill_kernel(), + _output(), + _suppressed(), + _non_max(false) +{ +} + +void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); + ARM_COMPUTE_ERROR_ON(BorderMode::UNDEFINED != border_mode); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(nullptr == corners); + ARM_COMPUTE_ERROR_ON(threshold < 1 && threshold > 255); + + _non_max = nonmax_suppression; + + TensorInfo tensor_info(input->info()->tensor_shape(), Format::U8); + _output.allocator()->init(tensor_info); + + // If border is UNDEFINED _fast_corners_kernel will operate in xwindow (3, + // width - 3) and ywindow (3, height -3) so the output image will leave the + // pixels on the borders unchanged. This is reflected in the valid region + // of the output. The non maxima suppression is only run on the valid + // pixels. + _fast_corners_kernel.configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode); + _border_handler.configure(input, _fast_corners_kernel.border_size(), border_mode, constant_border_value); + + if(!_non_max) + { + _fill_kernel.configure(&_output, 1 /* we keep all texels >0 */, corners); + } + else + { + _suppressed.allocator()->init(tensor_info); + _nonmax_kernel.configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode); + _fill_kernel.configure(&_suppressed, 1 /* we keep all texels >0 */, corners); + + // Allocate intermediate tensors + _suppressed.allocator()->allocate(); + } + + // Allocate intermediate tensors + _output.allocator()->allocate(); +} + +void NEFastCorners::run() +{ + _border_handler.run(_border_handler.window()); + + NEScheduler::get().schedule(&_fast_corners_kernel, Window::DimY); + + if(_non_max) + { + NEScheduler::get().schedule(&_nonmax_kernel, Window::DimY); + } + + NEScheduler::get().schedule(&_fill_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEFillBorder.cpp b/src/runtime/NEON/functions/NEFillBorder.cpp new file mode 100644 index 0000000000..e884f4a668 --- /dev/null +++ b/src/runtime/NEON/functions/NEFillBorder.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEFillBorder.h" + +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +void NEFillBorder::configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value) +{ + _border_handler.configure(input, border_width, border_mode, constant_border_value); +} + +void NEFillBorder::run() +{ + NEScheduler::get().schedule(&_border_handler, Window::DimZ); +} diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp new file mode 100644 index 0000000000..abb41e9f70 --- /dev/null +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" + +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +#include +#include + +using namespace arm_compute; + +NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights() + : _transpose_kernel(), _transpose1xW_kernel(), _transpose_output(), _transpose_weights(false), _is_batched_fc_layer(false) +{ +} + +void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON(output == nullptr); + ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != 2); + ARM_COMPUTE_ERROR_ON((transpose_weights == false) && (is_batched_fc_layer == false)); + + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + _transpose_weights = transpose_weights; + _is_batched_fc_layer = is_batched_fc_layer; + + // Check if we need to transpose the weights + if(_transpose_weights) + { + if(_is_batched_fc_layer) + { + // Initialize the output tensor for transpose + TensorShape shape_transposed(input->info()->dimension(1), input->info()->dimension(0)); + _transpose_output.allocator()->init(TensorInfo(shape_transposed, 1, dt, fixed_point_position)); + _transpose_kernel.configure(input, &_transpose_output); + + // Configure transpose 1xW kernel + _transpose1xW_kernel.configure(&_transpose_output, output); + + // Allocate temporary tensor used for transposing the weights + _transpose_output.allocator()->allocate(); + } + else + { + _transpose_kernel.configure(input, output); + } + } + else + { + if(_is_batched_fc_layer) + { + // Configure transpose 1xW kernel + _transpose1xW_kernel.configure(input, output); + } + else + { + ARM_COMPUTE_ERROR("Configuration transpose_weights=false & is_batched_fc_layer=false not supported"); + } + } +} + +void NEFullyConnectedLayerReshapeWeights::run() +{ + if(_transpose_weights) + { + NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + } + if(_is_batched_fc_layer) + { + NEScheduler::get().schedule(&_transpose1xW_kernel, Window::DimY); + } +} + +NEFullyConnectedLayer::NEFullyConnectedLayer() + : _im2col_kernel(), _reshape_weights_kernel(), _interleave4x4_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _interleave4x4_output(), _reshape_weights_output(), + _are_weights_reshaped(false), _is_fc_after_conv(false), _is_batched_fc_layer(false), _accumulate_biases(false) +{ +} + +void NEFullyConnectedLayer::configure_conv_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output) +{ + ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2) * (16 / weights->info()->element_size()))); + + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + // If the fully connected layer is called after a convolution layer, the input tensor must be linearized + + // Initialize output tensor for im2col + TensorShape shape_im2col; + shape_im2col.set(0, input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)); + shape_im2col.set(1, input->info()->dimension(3)); + shape_im2col.set(2, input->info()->dimension(4)); + shape_im2col.set(3, input->info()->dimension(5)); + _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); + + // Initialize output tensor for interleave 4x4 + TensorShape shape_interleaved = _im2col_output.info()->tensor_shape(); + shape_interleaved.set(0, shape_interleaved.x() * 4); + shape_interleaved.set(1, std::ceil(static_cast(shape_interleaved.y()) / 4)); + _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); + + // Configure im2col kernel + _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + + // Configure interleave4x4 kernel + _interleave4x4_kernel.configure(&_im2col_output, &_interleave4x4_output); + + // Configure matrix multiply kernel + _mm_kernel.configure(&_interleave4x4_output, weights, output, 1.0f); + + // Allocate the tensors once all the configure methods have been called + _im2col_output.allocator()->allocate(); + _interleave4x4_output.allocator()->allocate(); +} + +void NEFullyConnectedLayer::configure_fc_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output) +{ + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + // Initialize output tensor for interleave 4x4 + TensorShape shape_interleaved = input->info()->tensor_shape(); + shape_interleaved.set(0, shape_interleaved.x() * 4); + shape_interleaved.set(1, std::ceil(static_cast(shape_interleaved.y()) / 4)); + _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); + + // Configure interleave4x4 kernel + _interleave4x4_kernel.configure(input, &_interleave4x4_output); + + // Configure matrix multiply kernel + _mm_kernel.configure(&_interleave4x4_output, weights, output, 1.0f); + + // Allocate the tensors once all the configure methods have been called + _interleave4x4_output.allocator()->allocate(); +} + +void NEFullyConnectedLayer::configure_conv_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output) +{ + ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); + + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + // If the fully connected layer is called after a convolution layer, the input tensor must be linearized + + // Initialize output tensor for im2col + TensorShape shape_im2col; + shape_im2col.set(0, input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)); + shape_im2col.set(1, 1); + _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); + + // Configure im2col kernel + _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + + // Configure matrix multiply kernel + _mm_kernel.configure(&_im2col_output, weights, output, 1.0f); + + // Allocate the output tensor for im2col once all the configure methods have been called + _im2col_output.allocator()->allocate(); +} + +void NEFullyConnectedLayer::configure_fc_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output) +{ + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); + + // Configure matrix multiply kernel + _mm_kernel.configure(input, weights, output, 1.0f); +} + +void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights, bool are_weights_reshaped) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() != 2); + + const DataType dt = input->info()->data_type(); + const int fixed_point_position = input->info()->fixed_point_position(); + + _are_weights_reshaped = are_weights_reshaped; + _is_fc_after_conv = true; + _is_batched_fc_layer = false; + _accumulate_biases = false; + + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + + _accumulate_biases = true; + + // Configure accumulate biases kernel + _accumulate_biases_kernel.configure(output, biases); + } + + // With the Fully Connected layer we can have 4 different cases: + // 1) Convolution layer -> Fully Connected layer without batches + // 2) Fully Connected layer -> Fully Connected layer without batches + // 3) Convolution layer -> Fully Connected layer with batches + // 4) Fully Connected layer -> Fully Connected layer with batches + + // Check if we have a fully connected layer with batches + _is_batched_fc_layer = (output->info()->dimension(1) > 1); + + const ITensor *weights_to_use = weights; + + if(!are_weights_reshaped) + { + if((transpose_weights || _is_batched_fc_layer)) + { + weights_to_use = &_reshape_weights_output; + + if(transpose_weights) + { + if(_is_batched_fc_layer) + { + const float transpose_width = 16.0f / input->info()->element_size(); + TensorShape shape_wt(weights->info()->dimension(0) * static_cast(transpose_width), static_cast(std::ceil(weights->info()->dimension(1) / transpose_width))); + TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position); + _reshape_weights_output.allocator()->init(info_wt); + } + else + { + TensorShape shape_wt(weights->info()->dimension(1), weights->info()->dimension(0)); + TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position); + _reshape_weights_output.allocator()->init(info_wt); + } + } + else + { + ARM_COMPUTE_ERROR_ON(!_is_batched_fc_layer); + + const float transpose_width = 16.0f / input->info()->element_size(); + TensorShape shape_wt(weights->info()->dimension(1) * static_cast(transpose_width), static_cast(std::ceil(weights->info()->dimension(0) / transpose_width))); + TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position); + _reshape_weights_output.allocator()->init(info_wt); + } + + // Reshape the weights + _reshape_weights_kernel.configure(weights, &_reshape_weights_output, transpose_weights, _is_batched_fc_layer); + } + } + + if(_is_batched_fc_layer) + { + _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3, + input->info()->tensor_shape().cend(), + output->info()->tensor_shape().cbegin() + 1)); + + if(_is_fc_after_conv) + { + // Fully Connected layer after a Convolution Layer with batches + configure_conv_fc_wb(input, weights_to_use, output); + } + else + { + // Fully Connected layer after a Fully Connected Layer with batches + configure_fc_fc_wb(input, weights_to_use, output); + } + } + else + { + // In case of not batched fully connected layer, the weights will not be reshaped using transposed1xW + _is_fc_after_conv = ((weights_to_use->info()->dimension(1)) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))); + + if(_is_fc_after_conv) + { + // Fully Connected layer after a Convolution Layer without batches + configure_conv_fc_nb(input, weights_to_use, output); + } + else + { + // Fully Connected layer after a Fully Connected Layer without batches + configure_fc_fc_nb(input, weights_to_use, output); + } + } + + // Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called + if(!are_weights_reshaped) + { + if(transpose_weights || _is_batched_fc_layer) + { + // Allocate the tensor for the weights reshaped + _reshape_weights_output.allocator()->allocate(); + } + } +} + +void NEFullyConnectedLayer::run() +{ + // Reshape of the weights (happens only once) + if(!_are_weights_reshaped) + { + _are_weights_reshaped = true; + _reshape_weights_kernel.run(); + } + + // Linearize input if comes from a convolutional layer + if(_is_fc_after_conv) + { + NEScheduler::get().schedule(&_im2col_kernel, Window::DimY); + } + + // Interleave input + if(_is_batched_fc_layer) + { + NEScheduler::get().schedule(&_interleave4x4_kernel, Window::DimY); + } + + // Run matrix multiply + NEScheduler::get().schedule(&_mm_kernel, _is_batched_fc_layer ? Window::DimY : Window::DimX); + + // Accumulate biases if provided + if(_accumulate_biases) + { + NEScheduler::get().schedule(&_accumulate_biases_kernel, Window::DimY); + } +} diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp new file mode 100644 index 0000000000..15d5f4effb --- /dev/null +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEGEMM.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include + +using namespace arm_compute; + +NEGEMM::NEGEMM() + : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _run_vector_matrix_multiplication(false), _run_addition(false) +{ +} + +void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32, DataType::F16, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::F32, DataType::F16, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(d, 1, DataType::F32, DataType::F16, DataType::QS8); + + if(c != nullptr) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(c, 1, DataType::F32, DataType::F16, DataType::QS8); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, c); + ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(1) != c->info()->dimension(1), "The C matrix must have the same number of rows as the matrix A"); + ARM_COMPUTE_ERROR_ON_MSG(b->info()->dimension(0) != c->info()->dimension(0), "The C matrix must have the same number of columns as the matrix B"); + ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(0) != d->info()->dimension(0), "The C matrix must have the same number of rows as the output matrix"); + ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(1) != d->info()->dimension(1), "The C matrix must have the same number of columns as the output matrix"); + } + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, d); + ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B"); + + // Check if the first input tensor is a vector. If so, all the kernels for reshaping the tensors can be skipped + if((a->info()->dimension(1) == 1)) + { + _run_vector_matrix_multiplication = true; + + // Configure the matrix multiply kernel + _mm_kernel.configure(a, b, d, alpha); + } + else + { + _run_vector_matrix_multiplication = false; + + TensorShape shape_tmp_a = a->info()->tensor_shape(); + TensorShape shape_tmp_b = b->info()->tensor_shape(); + + shape_tmp_a.set(0, a->info()->dimension(0) * 4); + shape_tmp_a.set(1, std::ceil(a->info()->dimension(1) / 4.0f)); + + switch(a->info()->data_type()) + { + case DataType::F32: + { + shape_tmp_b.set(0, b->info()->dimension(1) * 4); + shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 4.0f)); + break; + } + case DataType::F16: +#ifdef ARM_COMPUTE_ENABLE_FP16 + { + shape_tmp_b.set(0, b->info()->dimension(1) * 8); + shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 8.0f)); + break; + } +#endif + case DataType::QS8: + { + shape_tmp_b.set(0, b->info()->dimension(1) * 16); + shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 16.0f)); + break; + } + default: + { + ARM_COMPUTE_ERROR_ON("Data type not supported"); + } + } + + TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type(), a->info()->fixed_point_position()); + TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), a->info()->fixed_point_position()); + + _tmp_a.allocator()->init(info_a); + _tmp_b.allocator()->init(info_b); + + // Configure interleave kernel + _interleave_kernel.configure(a, &_tmp_a); + + // Configure transpose kernel + _transpose_kernel.configure(b, &_tmp_b); + + // Configure matrix multiplication kernel + _mm_kernel.configure(&_tmp_a, &_tmp_b, d, alpha); + + // Allocate once the all configure methods have been called + _tmp_a.allocator()->allocate(); + _tmp_b.allocator()->allocate(); + } + + // Configure matrix addition kernel + if(beta != 0 && c != nullptr) + { + _ma_kernel.configure(c, d, beta); + _run_addition = true; + } +} + +void NEGEMM::run() +{ + if(!_run_vector_matrix_multiplication) + { + // Run interleave kernel + NEScheduler::get().schedule(&_interleave_kernel, Window::DimY); + + // Run transpose kernel + NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + } + + // Run matrix multiply kernel + NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY); + + // Run matrix addition kernel + if(_run_addition) + { + NEScheduler::get().schedule(&_ma_kernel, Window::DimY); + } +} diff --git a/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp b/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp new file mode 100644 index 0000000000..4c77c88656 --- /dev/null +++ b/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" + +using namespace arm_compute; + +void NEGEMMInterleave4x4::configure(const ITensor *input, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEGEMMLowp.cpp b/src/runtime/NEON/functions/NEGEMMLowp.cpp new file mode 100644 index 0000000000..b64f769459 --- /dev/null +++ b/src/runtime/NEON/functions/NEGEMMLowp.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEGEMMLowp.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/TensorAllocator.h" + +using namespace arm_compute; + +NEGEMMLowp::NEGEMMLowp() + : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _tmp_a(), _tmp_b() +{ +} + +void NEGEMMLowp::configure(const ITensor *a, const ITensor *b, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, output); + ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B"); + ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(1) != output->info()->dimension(1), "The C matrix must have the same number of rows as the matrix A"); + ARM_COMPUTE_ERROR_ON_MSG(b->info()->dimension(0) != output->info()->dimension(0), "The C matrix must have the same number of columns as the matrix C"); + + /* The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / 4.0f) ] */ + TensorShape shape_tmp_a = a->info()->tensor_shape(); + shape_tmp_a.set(0, a->info()->dimension(0) * 4); + shape_tmp_a.set(1, std::ceil(a->info()->dimension(1) / 4.f)); + + TensorShape shape_tmp_b = b->info()->tensor_shape(); + shape_tmp_b.set(0, b->info()->dimension(1) * 16); + shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 16.f)); + + TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type()); + TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type()); + _tmp_a.allocator()->init(info_a); + _tmp_b.allocator()->init(info_b); + + _interleave_kernel.configure(a, &_tmp_a); + _transpose_kernel.configure(b, &_tmp_b); + _mm_kernel.configure(&_tmp_a, &_tmp_b, output, a_offset, b_offset, output_offset, output_mult_int, shift); + + _tmp_a.allocator()->allocate(); + _tmp_b.allocator()->allocate(); +} + +void NEGEMMLowp::run() +{ + /* Run interleave kernel */ + NEScheduler::get().schedule(&_interleave_kernel, Window::DimY); + + /* Run transpose kernel */ + NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + + /* Run matrix multiply kernel */ + NEScheduler::get().schedule(&_mm_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp new file mode 100644 index 0000000000..dc40ecec14 --- /dev/null +++ b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" + +using namespace arm_compute; + +void NEGEMMTranspose1xW::configure(const ITensor *input, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEGaussian3x3.cpp b/src/runtime/NEON/functions/NEGaussian3x3.cpp new file mode 100644 index 0000000000..95ba5cbdf9 --- /dev/null +++ b/src/runtime/NEON/functions/NEGaussian3x3.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void NEGaussian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp new file mode 100644 index 0000000000..5ccc765966 --- /dev/null +++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/TensorAllocator.h" + +using namespace arm_compute; + +NEGaussian5x5::NEGaussian5x5() + : _kernel_hor(), _kernel_vert(), _tmp(), _border_handler() +{ +} + +void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + // Init temporary buffer + TensorInfo tensor_info(input->info()->tensor_shape(), Format::S16); + _tmp.allocator()->init(tensor_info); + + // Create and configure kernels for the two passes + _kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED); + _kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED); + + _tmp.allocator()->allocate(); + + _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); +} + +void NEGaussian5x5::run() +{ + _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_kernel_hor, Window::DimY); + NEScheduler::get().schedule(&_kernel_vert, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEGaussianPyramid.cpp b/src/runtime/NEON/functions/NEGaussianPyramid.cpp new file mode 100644 index 0000000000..e1d64f11f6 --- /dev/null +++ b/src/runtime/NEON/functions/NEGaussianPyramid.cpp @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" +#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/Pyramid.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include + +using namespace arm_compute; + +NEGaussianPyramid::NEGaussianPyramid() + : _input(nullptr), _pyramid(nullptr), _tmp() +{ +} + +NEGaussianPyramidHalf::NEGaussianPyramidHalf() + : _border_handler(), _horizontal_reduction(), _vertical_reduction() +{ +} + +void NEGaussianPyramidHalf::configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(nullptr == pyramid); + ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height()); + ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_HALF != pyramid->info()->scale()); + + /* Get number of pyramid levels */ + const size_t num_levels = pyramid->info()->num_levels(); + + _input = input; + _pyramid = pyramid; + + if(num_levels > 1) + { + _border_handler = arm_compute::cpp14::make_unique(num_levels - 1); + _horizontal_reduction = arm_compute::cpp14::make_unique(num_levels - 1); + _vertical_reduction = arm_compute::cpp14::make_unique(num_levels - 1); + + // Apply half scale to the X dimension of the tensor shape + TensorShape tensor_shape = pyramid->info()->tensor_shape(); + tensor_shape.set(0, (pyramid->info()->width() + 1) * SCALE_PYRAMID_HALF); + + PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_HALF, tensor_shape, Format::S16); + _tmp.init(pyramid_info); + + for(unsigned int i = 0; i < num_levels - 1; ++i) + { + /* Configure horizontal kernel */ + _horizontal_reduction[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode == BorderMode::UNDEFINED); + + /* Configure vertical kernel */ + _vertical_reduction[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), border_mode == BorderMode::UNDEFINED); + + /* Configure border */ + _border_handler[i].configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value)); + } + + _tmp.allocate(); + } +} + +void NEGaussianPyramidHalf::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_pyramid == nullptr, "Unconfigured function"); + + /* Get number of pyramid levels */ + const size_t num_levels = _pyramid->info()->num_levels(); + + /* The first level of the pyramid has the input image */ + _pyramid->get_pyramid_level(0)->copy_from(*_input); + + for(unsigned int i = 0; i < num_levels - 1; ++i) + { + _border_handler[i].run(_border_handler[i].window()); + NEScheduler::get().schedule(_horizontal_reduction.get() + i, Window::DimY); + NEScheduler::get().schedule(_vertical_reduction.get() + i, Window::DimY); + } +} + +NEGaussianPyramidOrb::NEGaussianPyramidOrb() + : _offsets(), _gaus5x5(), _scale_nearest() +{ +} + +void NEGaussianPyramidOrb::configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(nullptr == pyramid); + ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height()); + ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_ORB != pyramid->info()->scale()); + + /* Get number of pyramid levels */ + const size_t num_levels = pyramid->info()->num_levels(); + + _input = input; + _pyramid = pyramid; + + if(num_levels > 1) + { + _gaus5x5 = arm_compute::cpp14::make_unique(num_levels - 1); + _scale_nearest = arm_compute::cpp14::make_unique(num_levels - 1); + _offsets = arm_compute::cpp14::make_unique(num_levels - 1); + + PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_ORB, pyramid->info()->tensor_shape(), Format::U8); + _tmp.init(pyramid_info); + + for(unsigned int i = 0; i < num_levels - 1; ++i) + { + const size_t width = _pyramid->get_pyramid_level(i + 1)->info()->dimension(0); + const size_t height = _pyramid->get_pyramid_level(i + 1)->info()->dimension(1); + + /* Allocate Image for the offsets used by NEAREST interpolation */ + TensorInfo tensor_info(TensorShape(width, height), Format::S32); + _offsets[i].allocator()->init(tensor_info); + + /* Configure gaussian 5x5 */ + _gaus5x5[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value); + + /* Configure scale image kernel */ + _scale_nearest[i].configure(_tmp.get_pyramid_level(i), nullptr, nullptr, _offsets.get() + i, _pyramid->get_pyramid_level(i + 1), InterpolationPolicy::NEAREST_NEIGHBOR, + border_mode == BorderMode::UNDEFINED); + + _offsets[i].allocator()->allocate(); + } + + _tmp.allocate(); + } +} + +void NEGaussianPyramidOrb::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_pyramid == nullptr, "Unconfigured function"); + + /* Get number of pyramid levels */ + const size_t num_levels = _pyramid->info()->num_levels(); + + /* The first level of the pyramid has the input image */ + _pyramid->get_pyramid_level(0)->copy_from(*_input); + + for(unsigned int i = 0; i < num_levels - 1; ++i) + { + _gaus5x5[i].run(); + NEScheduler::get().schedule(_scale_nearest.get() + i, Window::DimY); + } +} diff --git a/src/runtime/NEON/functions/NEHOGDescriptor.cpp b/src/runtime/NEON/functions/NEHOGDescriptor.cpp new file mode 100644 index 0000000000..a592f53d44 --- /dev/null +++ b/src/runtime/NEON/functions/NEHOGDescriptor.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/HOGInfo.h" +#include "arm_compute/core/Size2D.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NEHOGDescriptor::NEHOGDescriptor() + : _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space() +{ +} + +void NEHOGDescriptor::configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(nullptr == output); + ARM_COMPUTE_ERROR_ON(nullptr == hog); + + const HOGInfo *hog_info = hog->info(); + const size_t width = input->info()->dimension(Window::DimX); + const size_t height = input->info()->dimension(Window::DimY); + const size_t num_bins = hog_info->num_bins(); + + Size2D cell_size = hog_info->cell_size(); + + // Calculate number of cells along the x and y directions for the hog_space + const size_t num_cells_x = width / cell_size.width; + const size_t num_cells_y = height / cell_size.height; + + // TensorShape of the input image + const TensorShape &shape_img = input->info()->tensor_shape(); + + // TensorShape of the hog space + TensorShape shape_hog_space = input->info()->tensor_shape(); + shape_hog_space.set(Window::DimX, num_cells_x); + shape_hog_space.set(Window::DimY, num_cells_y); + + // Allocate memory for magnitude, phase and hog space + TensorInfo info_mag(shape_img, Format::S16); + _mag.allocator()->init(info_mag); + + TensorInfo info_phase(shape_img, Format::U8); + _phase.allocator()->init(info_phase); + + TensorInfo info_space(shape_hog_space, num_bins, DataType::F32); + _hog_space.allocator()->init(info_space); + + // Initialise gradient kernel + _gradient.configure(input, &_mag, &_phase, hog_info->phase_type(), border_mode, constant_border_value); + + // Initialise orientation binning kernel + _orient_bin.configure(&_mag, &_phase, &_hog_space, hog->info()); + + // Initialize HOG norm kernel + _block_norm.configure(&_hog_space, output, hog->info()); + + // Allocate intermediate tensors + _mag.allocator()->allocate(); + _phase.allocator()->allocate(); + _hog_space.allocator()->allocate(); +} + +void NEHOGDescriptor::run() +{ + // Run gradient + _gradient.run(); + + // Run orientation binning kernel + NEScheduler::get().schedule(&_orient_bin, Window::DimY); + + // Run block normalization kernel + NEScheduler::get().schedule(&_block_norm, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEHOGDetector.cpp b/src/runtime/NEON/functions/NEHOGDetector.cpp new file mode 100644 index 0000000000..e8ed29d0b9 --- /dev/null +++ b/src/runtime/NEON/functions/NEHOGDetector.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" + +using namespace arm_compute; + +void NEHOGDetector::configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, hog, detection_windows, detection_window_stride, threshold, idx_class); + _kernel = std::move(k); +} \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEHOGGradient.cpp b/src/runtime/NEON/functions/NEHOGGradient.cpp new file mode 100644 index 0000000000..2f4b8802e3 --- /dev/null +++ b/src/runtime/NEON/functions/NEHOGGradient.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NEHOGGradient::NEHOGGradient() + : _derivative(), _mag_phase(nullptr), _gx(), _gy() +{ +} + +void NEHOGGradient::configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_magnitude, 1, DataType::S16); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_phase, 1, DataType::U8); + + const TensorShape &shape_img = input->info()->tensor_shape(); + + // Allocate image memory + TensorInfo info(shape_img, Format::S16); + _gx.allocator()->init(info); + _gy.allocator()->init(info); + + // Initialise derivate kernel + _derivative.configure(input, &_gx, &_gy, border_mode, constant_border_value); + + // Initialise magnitude/phase kernel + if(PhaseType::UNSIGNED == phase_type) + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(&_gx, &_gy, output_magnitude, output_phase); + _mag_phase = std::move(k); + } + else + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(&_gx, &_gy, output_magnitude, output_phase); + _mag_phase = std::move(k); + } + + // Allocate intermediate tensors + _gx.allocator()->allocate(); + _gy.allocator()->allocate(); +} + +void NEHOGGradient::run() +{ + // Run derivative + _derivative.run(); + + // Run magnitude/phase kernel + NEScheduler::get().schedule(_mag_phase.get(), Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp new file mode 100644 index 0000000000..173b8f4c42 --- /dev/null +++ b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/Tensor.h" + +using namespace arm_compute; + +NEHOGMultiDetection::NEHOGMultiDetection() + : _gradient_kernel(), _orient_bin_kernel(), _block_norm_kernel(), _hog_detect_kernel(), _non_maxima_kernel(), _hog_space(), _hog_norm_space(), _detection_windows(), _mag(), _phase(), + _non_maxima_suppression(false), _num_orient_bin_kernel(0), _num_block_norm_kernel(0), _num_hog_detect_kernel(0) +{ +} + +void NEHOGMultiDetection::configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode, + uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(multi_hog); + ARM_COMPUTE_ERROR_ON(nullptr == detection_windows); + ARM_COMPUTE_ERROR_ON(detection_window_strides->num_values() != multi_hog->num_models()); + + const size_t width = input->info()->dimension(Window::DimX); + const size_t height = input->info()->dimension(Window::DimY); + const TensorShape &shape_img = input->info()->tensor_shape(); + const size_t num_models = multi_hog->num_models(); + PhaseType phase_type = multi_hog->model(0)->info()->phase_type(); + + size_t prev_num_bins = multi_hog->model(0)->info()->num_bins(); + Size2D prev_cell_size = multi_hog->model(0)->info()->cell_size(); + Size2D prev_block_size = multi_hog->model(0)->info()->block_size(); + Size2D prev_block_stride = multi_hog->model(0)->info()->block_stride(); + + /* Check if NEHOGOrientationBinningKernel and NEHOGBlockNormalizationKernel kernels can be skipped for a specific HOG data-object + * + * 1) NEHOGOrientationBinningKernel and NEHOGBlockNormalizationKernel are skipped if the cell size and the number of bins don't change. + * Since "multi_hog" is sorted,it is enough to check the HOG descriptors at level "ith" and level "(i-1)th + * 2) NEHOGBlockNormalizationKernel is skipped if the cell size, the number of bins and block size do not change. + * Since "multi_hog" is sorted,it is enough to check the HOG descriptors at level "ith" and level "(i-1)th + * + * @note Since the orientation binning and block normalization kernels can be skipped, we need to keep track of the input to process for each kernel + * with "input_orient_bin", "input_hog_detect" and "input_block_norm" + */ + std::vector input_orient_bin; + std::vector input_hog_detect; + std::vector> input_block_norm; + + input_orient_bin.push_back(0); + input_hog_detect.push_back(0); + input_block_norm.emplace_back(0, 0); + + for(size_t i = 1; i < num_models; ++i) + { + size_t cur_num_bins = multi_hog->model(i)->info()->num_bins(); + Size2D cur_cell_size = multi_hog->model(i)->info()->cell_size(); + Size2D cur_block_size = multi_hog->model(i)->info()->block_size(); + Size2D cur_block_stride = multi_hog->model(i)->info()->block_stride(); + + if((cur_num_bins != prev_num_bins) || (cur_cell_size.width != prev_cell_size.width) || (cur_cell_size.height != prev_cell_size.height)) + { + prev_num_bins = cur_num_bins; + prev_cell_size = cur_cell_size; + prev_block_size = cur_block_size; + prev_block_stride = cur_block_stride; + + // Compute orientation binning and block normalization kernels. Update input to process + input_orient_bin.push_back(i); + input_block_norm.emplace_back(i, input_orient_bin.size() - 1); + } + else if((cur_block_size.width != prev_block_size.width) || (cur_block_size.height != prev_block_size.height) || (cur_block_stride.width != prev_block_stride.width) + || (cur_block_stride.height != prev_block_stride.height)) + { + prev_block_size = cur_block_size; + prev_block_stride = cur_block_stride; + + // Compute block normalization kernel. Update input to process + input_block_norm.emplace_back(i, input_orient_bin.size() - 1); + } + + // Update input to process for hog detector kernel + input_hog_detect.push_back(input_block_norm.size() - 1); + } + + _detection_windows = detection_windows; + _non_maxima_suppression = non_maxima_suppression; + _num_orient_bin_kernel = input_orient_bin.size(); // Number of NEHOGOrientationBinningKernel kernels to compute + _num_block_norm_kernel = input_block_norm.size(); // Number of NEHOGBlockNormalizationKernel kernels to compute + _num_hog_detect_kernel = input_hog_detect.size(); // Number of NEHOGDetector functions to compute + + _orient_bin_kernel = arm_compute::cpp14::make_unique(_num_orient_bin_kernel); + _block_norm_kernel = arm_compute::cpp14::make_unique(_num_block_norm_kernel); + _hog_detect_kernel = arm_compute::cpp14::make_unique(_num_hog_detect_kernel); + _non_maxima_kernel = arm_compute::cpp14::make_unique(); + _hog_space = arm_compute::cpp14::make_unique(_num_orient_bin_kernel); + _hog_norm_space = arm_compute::cpp14::make_unique(_num_block_norm_kernel); + + // Allocate tensors for magnitude and phase + TensorInfo info_mag(shape_img, Format::S16); + _mag.allocator()->init(info_mag); + + TensorInfo info_phase(shape_img, Format::U8); + _phase.allocator()->init(info_phase); + + // Initialise gradient kernel + _gradient_kernel.configure(input, &_mag, &_phase, phase_type, border_mode, constant_border_value); + + // Configure NETensor for the HOG space and orientation binning kernel + for(size_t i = 0; i < _num_orient_bin_kernel; ++i) + { + const size_t idx_multi_hog = input_orient_bin[i]; + + // Get the corresponding cell size and number of bins + const Size2D &cell = multi_hog->model(idx_multi_hog)->info()->cell_size(); + const size_t num_bins = multi_hog->model(idx_multi_hog)->info()->num_bins(); + + // Calculate number of cells along the x and y directions for the hog_space + const size_t num_cells_x = width / cell.width; + const size_t num_cells_y = height / cell.height; + + // TensorShape of hog space + TensorShape shape_hog_space = input->info()->tensor_shape(); + shape_hog_space.set(Window::DimX, num_cells_x); + shape_hog_space.set(Window::DimY, num_cells_y); + + // Allocate HOG space + TensorInfo info_space(shape_hog_space, num_bins, DataType::F32); + _hog_space[i].allocator()->init(info_space); + + // Initialise orientation binning kernel + _orient_bin_kernel[i].configure(&_mag, &_phase, _hog_space.get() + i, multi_hog->model(idx_multi_hog)->info()); + } + + // Configure NETensor for the normalized HOG space and block normalization kernel + for(size_t i = 0; i < _num_block_norm_kernel; ++i) + { + const size_t idx_multi_hog = input_block_norm[i].first; + const size_t idx_orient_bin = input_block_norm[i].second; + + // Allocate normalized HOG space + TensorInfo tensor_info(*(multi_hog->model(idx_multi_hog)->info()), width, height); + _hog_norm_space[i].allocator()->init(tensor_info); + + // Initialize block normalization kernel + _block_norm_kernel[i].configure(_hog_space.get() + idx_orient_bin, _hog_norm_space.get() + i, multi_hog->model(idx_multi_hog)->info()); + } + + // Configure HOG detector kernel + for(size_t i = 0; i < _num_hog_detect_kernel; ++i) + { + const size_t idx_block_norm = input_hog_detect[i]; + + _hog_detect_kernel[i].configure(_hog_norm_space.get() + idx_block_norm, multi_hog->model(i), detection_windows, detection_window_strides->at(i), threshold, i); + } + + // Configure non maxima suppression kernel + _non_maxima_kernel->configure(_detection_windows, min_distance); + + // Allocate intermediate tensors + _mag.allocator()->allocate(); + _phase.allocator()->allocate(); + + for(size_t i = 0; i < _num_orient_bin_kernel; ++i) + { + _hog_space[i].allocator()->allocate(); + } + + for(size_t i = 0; i < _num_block_norm_kernel; ++i) + { + _hog_norm_space[i].allocator()->allocate(); + } +} + +void NEHOGMultiDetection::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_detection_windows == nullptr, "Unconfigured function"); + + // Reset detection window + _detection_windows->clear(); + + // Run gradient + _gradient_kernel.run(); + + // Run orientation binning kernel + for(size_t i = 0; i < _num_orient_bin_kernel; ++i) + { + NEScheduler::get().schedule(_orient_bin_kernel.get() + i, Window::DimY); + } + + // Run block normalization kernel + for(size_t i = 0; i < _num_block_norm_kernel; ++i) + { + NEScheduler::get().schedule(_block_norm_kernel.get() + i, Window::DimY); + } + + // Run HOG detector kernel + for(size_t i = 0; i < _num_hog_detect_kernel; ++i) + { + _hog_detect_kernel[i].run(); + } + + // Run non-maxima suppression kernel if enabled + if(_non_maxima_suppression) + { + _non_maxima_kernel->run(_non_maxima_kernel->window()); + } +} diff --git a/src/runtime/NEON/functions/NEHarrisCorners.cpp b/src/runtime/NEON/functions/NEHarrisCorners.cpp new file mode 100644 index 0000000000..b54fb67ab7 --- /dev/null +++ b/src/runtime/NEON/functions/NEHarrisCorners.cpp @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/Array.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" +#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" +#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include +#include + +using namespace arm_compute; + +NEHarrisCorners::NEHarrisCorners() + : _sobel(), _harris_score(), _non_max_suppr(), _candidates(), _sort_euclidean(), _border_gx(), _border_gy(), _gx(), _gy(), _score(), _nonmax(), _corners_list(), _num_corner_candidates(0) +{ +} + +void NEHarrisCorners::configure(IImage *input, float threshold, float min_dist, + float sensitivity, int32_t gradient_size, int32_t block_size, KeyPointArray *corners, + BorderMode border_mode, uint8_t constant_border_value, bool use_fp16) +{ + ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(!(block_size == 3 || block_size == 5 || block_size == 7)); + + const TensorShape shape = input->info()->tensor_shape(); + TensorInfo tensor_info_gxgy; + + if(gradient_size < 7) + { + tensor_info_gxgy.init(shape, Format::S16); + } + else + { + tensor_info_gxgy.init(shape, Format::S32); + } + + _gx.allocator()->init(tensor_info_gxgy); + _gy.allocator()->init(tensor_info_gxgy); + + TensorInfo tensor_info_score(shape, Format::F32); + _score.allocator()->init(tensor_info_score); + _nonmax.allocator()->init(tensor_info_score); + + _corners_list = arm_compute::cpp14::make_unique(shape.x() * shape.y()); + + // Set/init Sobel kernel accordingly with gradient_size + switch(gradient_size) + { + case 3: + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + break; + } + case 5: + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + break; + } + case 7: + { + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, &_gx, &_gy, border_mode, constant_border_value); + _sobel = std::move(k); + break; + } + default: + ARM_COMPUTE_ERROR("Gradient size not implemented"); + } + + // Normalization factor + const float norm_factor = 1.0f / (255.0f * pow(4.0f, gradient_size / 2) * block_size); + + if(use_fp16) + { + switch(block_size) + { + case 3: + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + _harris_score = std::move(k); + } + break; + case 5: + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + _harris_score = std::move(k); + } + break; + case 7: + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + _harris_score = std::move(k); + } + default: + break; + } + } + else + { + // Set/init Harris Score kernel accordingly with block_size + switch(block_size) + { + case 3: + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + _harris_score = std::move(k); + } + break; + case 5: + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + _harris_score = std::move(k); + } + break; + case 7: + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + _harris_score = std::move(k); + } + default: + break; + } + } + + // Configure border filling before harris score + _border_gx.configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value); + _border_gy.configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value); + + // Init non-maxima suppression function + _non_max_suppr.configure(&_score, &_nonmax, border_mode); + + // Init corner candidates kernel + _candidates.configure(&_nonmax, _corners_list.get(), &_num_corner_candidates); + + // Init euclidean distance + _sort_euclidean.configure(_corners_list.get(), corners, &_num_corner_candidates, min_dist); + + // Allocate once all the configure methods have been called + _gx.allocator()->allocate(); + _gy.allocator()->allocate(); + _score.allocator()->allocate(); + _nonmax.allocator()->allocate(); +} + +void NEHarrisCorners::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function"); + + // Init to 0 number of corner candidates + _num_corner_candidates = 0; + + // Run Sobel kernel + _sobel->run(); + + // Fill border before harris score kernel + _border_gx.run(_border_gx.window()); + _border_gy.run(_border_gy.window()); + + // Run harris score kernel + NEScheduler::get().schedule(_harris_score.get(), Window::DimY); + + // Run non-maxima suppression + _non_max_suppr.run(); + + // Run corner candidate kernel + NEScheduler::get().schedule(&_candidates, Window::DimY); + + // Run sort & euclidean distance + _sort_euclidean.run(_sort_euclidean.window()); +} diff --git a/src/runtime/NEON/functions/NEHistogram.cpp b/src/runtime/NEON/functions/NEHistogram.cpp new file mode 100644 index 0000000000..c42b2a56e0 --- /dev/null +++ b/src/runtime/NEON/functions/NEHistogram.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEHistogram.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IDistribution1D.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NEHistogram::NEHistogram() + : _histogram_kernel(), _local_hist(), _window_lut(arm_compute::cpp14::make_unique(window_lut_default_size)), _local_hist_size(0) +{ +} + +void NEHistogram::configure(const IImage *input, IDistribution1D *output) +{ + ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + // Allocate space for threads local histograms + _local_hist_size = output->num_bins() * NEScheduler::get().num_threads(); + _local_hist = arm_compute::cpp14::make_unique(_local_hist_size); + + // Configure kernel + _histogram_kernel.configure(input, output, _local_hist.get(), _window_lut.get()); +} + +void NEHistogram::run() +{ + // Calculate histogram of input. + NEScheduler::get().schedule(&_histogram_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEIntegralImage.cpp b/src/runtime/NEON/functions/NEIntegralImage.cpp new file mode 100644 index 0000000000..af604e9295 --- /dev/null +++ b/src/runtime/NEON/functions/NEIntegralImage.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" +#include "arm_compute/core/Types.h" + +#include + +using namespace arm_compute; + +void NEIntegralImage::configure(const ITensor *input, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); + _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, 0); +} diff --git a/src/runtime/NEON/functions/NELaplacianPyramid.cpp b/src/runtime/NEON/functions/NELaplacianPyramid.cpp new file mode 100644 index 0000000000..8232c79f2d --- /dev/null +++ b/src/runtime/NEON/functions/NELaplacianPyramid.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" +#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h" +#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h" +#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h" +#include "arm_compute/runtime/Tensor.h" + +using namespace arm_compute; + +NELaplacianPyramid::NELaplacianPyramid() + : _num_levels(0), _gaussian_pyr_function(), _convf(), _subf(), _gauss_pyr(), _conv_pyr(), _depth_function() +{ +} + +void NELaplacianPyramid::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(0 == _num_levels, "Unconfigured function"); + + // Compute Gaussian Pyramid + _gaussian_pyr_function.run(); + + for(unsigned int i = 0; i < _num_levels; ++i) + { + // Apply Gaussian filter to gaussian pyramid image + _convf[i].run(); + } + + for(unsigned int i = 0; i < _num_levels; ++i) + { + // Compute laplacian image + _subf[i].run(); + } + + _depth_function.run(); +} + +void NELaplacianPyramid::configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(nullptr == pyramid); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S16); + ARM_COMPUTE_ERROR_ON(0 == pyramid->info()->num_levels()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width()); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height()); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(1)); + + _num_levels = pyramid->info()->num_levels(); + + // Create and initialize the gaussian pyramid and the convoluted pyramid + PyramidInfo pyramid_info; + pyramid_info.init(_num_levels, 0.5f, pyramid->info()->tensor_shape(), arm_compute::Format::U8); + + _gauss_pyr.init(pyramid_info); + _conv_pyr.init(pyramid_info); + + // Create Gaussian Pyramid function + _gaussian_pyr_function.configure(input, &_gauss_pyr, border_mode, constant_border_value); + + _convf = arm_compute::cpp14::make_unique(_num_levels); + _subf = arm_compute::cpp14::make_unique(_num_levels); + + for(unsigned int i = 0; i < _num_levels; ++i) + { + _convf[i].configure(_gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), border_mode, constant_border_value); + _subf[i].configure(_gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), pyramid->get_pyramid_level(i), ConvertPolicy::WRAP); + } + + _depth_function.configure(_conv_pyr.get_pyramid_level(_num_levels - 1), output, ConvertPolicy::WRAP, 0); + + _gauss_pyr.allocate(); + _conv_pyr.allocate(); +} diff --git a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp new file mode 100644 index 0000000000..36ac4a74d1 --- /dev/null +++ b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IPyramid.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" + +#include + +using namespace arm_compute; + +NELaplacianReconstruct::NELaplacianReconstruct() + : _tmp_pyr(), _addf(), _scalef(), _depthf() +{ +} + +void NELaplacianReconstruct::configure(const IPyramid *pyramid, const ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(nullptr == pyramid); + ARM_COMPUTE_ERROR_ON(input == output); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); + ARM_COMPUTE_ERROR_ON(output->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions()); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != pyramid->get_pyramid_level(0)->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != pyramid->get_pyramid_level(0)->info()->dimension(1)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(1)); + + const size_t num_levels = pyramid->info()->num_levels(); + + // Create and initialize the tmp pyramid: I(n-2) = upsample( input + Laplace(n-1) ) + PyramidInfo pyramid_info; + pyramid_info.init(num_levels, 0.5f, output->info()->tensor_shape(), arm_compute::Format::S16); + + _tmp_pyr.init(pyramid_info); + + // Allocate add and scale functions. Level 0 does not need to be scaled. + _addf = arm_compute::cpp14::make_unique(num_levels); + _scalef = arm_compute::cpp14::make_unique(num_levels - 1); + + const size_t last_level = num_levels - 1; + + _addf[last_level].configure(input, pyramid->get_pyramid_level(last_level), _tmp_pyr.get_pyramid_level(last_level), ConvertPolicy::SATURATE); + + // Scale levels n-1 to 1, and add levels n-2 to 0 + for(size_t l = 0; l < last_level; ++l) + { + _scalef[l].configure(_tmp_pyr.get_pyramid_level(l + 1), _tmp_pyr.get_pyramid_level(l), arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value); + _addf[l].configure(_tmp_pyr.get_pyramid_level(l), pyramid->get_pyramid_level(l), _tmp_pyr.get_pyramid_level(l), ConvertPolicy::SATURATE); + } + + // Convert level 0 from S16 to U8 + _depthf.configure(_tmp_pyr.get_pyramid_level(0), output, ConvertPolicy::SATURATE, 0); + + _tmp_pyr.allocate(); +} + +void NELaplacianReconstruct::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_addf == nullptr, "Unconfigured function"); + + const size_t last_level = _tmp_pyr.info()->num_levels() - 1; + + _addf[last_level].run(); + + // Run l = [last_level - 1, 0] + for(size_t l = last_level; l-- > 0;) + { + _scalef[l].run(); + _addf[l].run(); + } + + _depthf.run(); +} diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp new file mode 100644 index 0000000000..85d7ba3650 --- /dev/null +++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h" + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +#include +#include + +using namespace arm_compute; + +NELocallyConnectedLayer::NELocallyConnectedLayer() + : _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), _is_first_run(false) +{ +} + +void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2)); + + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3)); + ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 2); + } + + bool _has_bias = (biases != nullptr); + _is_first_run = true; + + // Get parameters for conv_info + unsigned int stride_x = 0; + unsigned int stride_y = 0; + unsigned int pad_x = 0; + unsigned int pad_y = 0; + std::tie(stride_x, stride_y) = conv_info.stride(); + std::tie(pad_x, pad_y) = conv_info.pad(); + + // Get convolved dimensions + unsigned int conv_w = 0; + unsigned int conv_h = 0; + std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), + stride_x, stride_y, pad_x, pad_y, conv_info.round()); + + ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one"); + ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(4) != (conv_w * conv_h), "Weights shape does not match the expected one"); + + // Create tensor to store the reshaped weights + const size_t mat_weights_cols = weights->info()->dimension(3); + const size_t mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + ((_has_bias) ? 1 : 0); + const size_t mat_weights_num = weights->info()->dimension(4); + + const TensorShape shape_wr(mat_weights_cols, mat_weights_rows, mat_weights_num); + + _weights_reshaped.allocator()->init(TensorInfo(shape_wr, 1, weights->info()->data_type())); + + // Create tensor to store im2col reshaped inputs + const size_t mat_input_cols = mat_weights_rows; + const size_t mat_input_rows = conv_w * conv_h; + TensorShape shape_im2col = input->info()->tensor_shape(); + shape_im2col.set(0, mat_input_cols); + shape_im2col.set(1, mat_input_rows); + shape_im2col.set(2, 1); + + _input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, input->info()->data_type())); + + // Create locally connected layer output tensor + TensorShape shape_gemm = _input_im2col_reshaped.info()->tensor_shape(); + shape_gemm.set(0, mat_weights_cols); + shape_gemm.set(1, mat_input_rows); + _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type())); + + // Configure kernels + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); + _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); + _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); + + // Allocate intermediate tensors + _weights_reshaped.allocator()->allocate(); + _input_im2col_reshaped.allocator()->allocate(); + _gemm_output.allocator()->allocate(); +} + +void NELocallyConnectedLayer::run() +{ + // Run weights reshaping (Runs once for every configure) + if(_is_first_run) + { + _is_first_run = false; + NEScheduler::get().schedule(&_weights_reshape_kernel, 3); + } + + // Run input reshaping + NEScheduler::get().schedule(&_input_im2col_kernel, Window::DimY); + + // Runs GEMM on reshaped matrices + NEScheduler::get().schedule(&_mm_kernel, Window::DimX); + + // Reshape output matrix + NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEMagnitude.cpp b/src/runtime/NEON/functions/NEMagnitude.cpp new file mode 100644 index 0000000000..9390ca2b6a --- /dev/null +++ b/src/runtime/NEON/functions/NEMagnitude.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEMagnitude.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" +#include "arm_compute/core/Types.h" + +#include + +using namespace arm_compute; + +void NEMagnitude::configure(const ITensor *input1, const ITensor *input2, ITensor *output, bool use_fp16) +{ + if(use_fp16) + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(input1, input2, output, nullptr); + _kernel = std::move(k); + } + else + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(input1, input2, output, nullptr); + _kernel = std::move(k); + } +} diff --git a/src/runtime/NEON/functions/NEMeanStdDev.cpp b/src/runtime/NEON/functions/NEMeanStdDev.cpp new file mode 100644 index 0000000000..47143f5e5b --- /dev/null +++ b/src/runtime/NEON/functions/NEMeanStdDev.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h" + +#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NEMeanStdDev::NEMeanStdDev() + : _mean_stddev_kernel(), _global_sum(0), _global_sum_squared(0) +{ +} + +void NEMeanStdDev::configure(const IImage *input, float *mean, float *stddev) +{ + _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared); +} + +void NEMeanStdDev::run() +{ + _global_sum = 0; + _global_sum_squared = 0; + + NEScheduler::get().schedule(&_mean_stddev_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEMedian3x3.cpp b/src/runtime/NEON/functions/NEMedian3x3.cpp new file mode 100644 index 0000000000..aa7cc97081 --- /dev/null +++ b/src/runtime/NEON/functions/NEMedian3x3.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void NEMedian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NEMinMaxLocation.cpp b/src/runtime/NEON/functions/NEMinMaxLocation.cpp new file mode 100644 index 0000000000..cab9200cf8 --- /dev/null +++ b/src/runtime/NEON/functions/NEMinMaxLocation.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h" + +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NEMinMaxLocation::NEMinMaxLocation() + : _min_max(), _min_max_loc() +{ +} + +void NEMinMaxLocation::configure(const IImage *input, int32_t *min, int32_t *max, ICoordinates2DArray *min_loc, ICoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count) +{ + _min_max.configure(input, min, max); + _min_max_loc.configure(input, min, max, min_loc, max_loc, min_count, max_count); +} + +void NEMinMaxLocation::run() +{ + _min_max.reset(); + + /* Run min max kernel */ + NEScheduler::get().schedule(&_min_max, Window::DimY); + + /* Run min max location */ + NEScheduler::get().schedule(&_min_max_loc, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NENonLinearFilter.cpp b/src/runtime/NEON/functions/NENonLinearFilter.cpp new file mode 100644 index 0000000000..01aea3b671 --- /dev/null +++ b/src/runtime/NEON/functions/NENonLinearFilter.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void NENonLinearFilter::configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, + BorderMode border_mode, + uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp new file mode 100644 index 0000000000..a7b3759a45 --- /dev/null +++ b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" + +#include + +using namespace arm_compute; + +void NENonMaximaSuppression3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + + if(border_mode != BorderMode::UNDEFINED) + { + _border_handler.configure(input, 1, BorderMode::CONSTANT, 0); + } + else + { + _border_handler.configure(input, 1, BorderMode::UNDEFINED, 0); + } +} diff --git a/src/runtime/NEON/functions/NENormalizationLayer.cpp b/src/runtime/NEON/functions/NENormalizationLayer.cpp new file mode 100644 index 0000000000..69ff32591f --- /dev/null +++ b/src/runtime/NEON/functions/NENormalizationLayer.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +using namespace arm_compute; + +NENormalizationLayer::NENormalizationLayer() + : _norm_kernel(), _multiply_kernel(), _border_handler(), _input_squared() +{ +} + +void NENormalizationLayer::configure(const ITensor *input, ITensor *output, NormalizationLayerInfo norm_info) +{ + ARM_COMPUTE_ERROR_ON(input == nullptr); + + TensorInfo tensor_info(input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + _input_squared.allocator()->init(tensor_info); + + // Configure kernels + _norm_kernel.configure(input, &_input_squared, output, norm_info); + _multiply_kernel.configure(input, input, &_input_squared, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO); + _border_handler.configure(&_input_squared, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0.0f)); + + // Allocate the tensor once the configure methods have been called + _input_squared.allocator()->allocate(); +} + +void NENormalizationLayer::run() +{ + NEScheduler::get().schedule(&_multiply_kernel, Window::DimY); + NEScheduler::get().schedule(&_border_handler, Window::DimY); + NEScheduler::get().schedule(&_norm_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NEOpticalFlow.cpp b/src/runtime/NEON/functions/NEOpticalFlow.cpp new file mode 100644 index 0000000000..49135e442c --- /dev/null +++ b/src/runtime/NEON/functions/NEOpticalFlow.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEOpticalFlow.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" +#include "arm_compute/runtime/Pyramid.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +using namespace arm_compute; + +NEOpticalFlow::NEOpticalFlow() + : _func_scharr(), _kernel_tracker(), _scharr_gx(), _scharr_gy(), _new_points(nullptr), _new_points_estimates(nullptr), _old_points(nullptr), _new_points_internal(), _old_points_internal(), + _num_levels(0) +{ +} + +void NEOpticalFlow::configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, + IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension, + bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(nullptr == old_pyramid); + ARM_COMPUTE_ERROR_ON(nullptr == new_pyramid); + ARM_COMPUTE_ERROR_ON(nullptr == old_points); + ARM_COMPUTE_ERROR_ON(nullptr == new_points_estimates); + ARM_COMPUTE_ERROR_ON(nullptr == new_points); + ARM_COMPUTE_ERROR_ON(old_pyramid->info()->num_levels() != new_pyramid->info()->num_levels()); + ARM_COMPUTE_ERROR_ON(0 == old_pyramid->info()->num_levels()); + ARM_COMPUTE_ERROR_ON(old_pyramid->info()->width() != new_pyramid->info()->width()); + ARM_COMPUTE_ERROR_ON(old_pyramid->info()->height() != new_pyramid->info()->height()); + ARM_COMPUTE_ERROR_ON(use_initial_estimate && old_points->num_values() != new_points_estimates->num_values()); + + _num_levels = old_pyramid->info()->num_levels(); + _old_points = old_points; + _new_points = new_points; + _new_points_estimates = new_points_estimates; + + const float pyr_scale = old_pyramid->info()->scale(); + + _func_scharr = arm_compute::cpp14::make_unique(_num_levels); + _kernel_tracker = arm_compute::cpp14::make_unique(_num_levels); + _scharr_gx = arm_compute::cpp14::make_unique(_num_levels); + _scharr_gy = arm_compute::cpp14::make_unique(_num_levels); + + _old_points_internal = LKInternalKeypointArray(old_points->num_values()); + _new_points_internal = LKInternalKeypointArray(old_points->num_values()); + _new_points->resize(old_points->num_values()); + + for(unsigned int i = 0; i < _num_levels; ++i) + { + // Get images from the ith level of old and right pyramid + IImage *old_ith_input = old_pyramid->get_pyramid_level(i); + IImage *new_ith_input = new_pyramid->get_pyramid_level(i); + + // Get width and height of images + const unsigned int width_ith = old_ith_input->info()->dimension(0); + const unsigned int height_ith = new_ith_input->info()->dimension(1); + + TensorInfo tensor_info(TensorShape(width_ith, height_ith), Format::S16); + + _scharr_gx[i].allocator()->init(tensor_info); + _scharr_gy[i].allocator()->init(tensor_info); + + // Init Scharr kernel + _func_scharr[i].configure(old_ith_input, _scharr_gx.get() + i, _scharr_gy.get() + i, border_mode, constant_border_value); + + // Init Lucas-Kanade kernel + _kernel_tracker[i].configure(old_ith_input, new_ith_input, _scharr_gx.get() + i, _scharr_gy.get() + i, + old_points, new_points_estimates, new_points, + &_old_points_internal, &_new_points_internal, + termination, use_initial_estimate, epsilon, num_iterations, window_dimension, + i, _num_levels, pyr_scale); + + _scharr_gx[i].allocator()->allocate(); + _scharr_gy[i].allocator()->allocate(); + } +} + +void NEOpticalFlow::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(_num_levels == 0, "Unconfigured function"); + + for(unsigned int level = _num_levels; level > 0; --level) + { + // Run Scharr kernel + _func_scharr[level - 1].run(); + + // Run Lucas-Kanade kernel + NEScheduler::get().schedule(_kernel_tracker.get() + level - 1, Window::DimX); + } +} diff --git a/src/runtime/NEON/functions/NEPhase.cpp b/src/runtime/NEON/functions/NEPhase.cpp new file mode 100644 index 0000000000..7683f461d3 --- /dev/null +++ b/src/runtime/NEON/functions/NEPhase.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEPhase.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" + +#include + +using namespace arm_compute; + +void NEPhase::configure(const ITensor *input1, const ITensor *input2, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique>(); + k->configure(input1, input2, nullptr, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp new file mode 100644 index 0000000000..056d33b370 --- /dev/null +++ b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" + +#include + +using namespace arm_compute; + +void NEPixelWiseMultiplication::configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input1, input2, output, scale, overflow_policy, rounding_policy); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp new file mode 100644 index 0000000000..6f0cc4f160 --- /dev/null +++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" + +using namespace arm_compute; + +void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info) +{ + // Configure pooling kernel + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, pool_info); + _kernel = std::move(k); + + // Configure border depending on operation required + BorderMode border_mode = (pool_info.pool_type() == PoolingType::MAX) ? BorderMode::REPLICATE : BorderMode::CONSTANT; + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0)); +} diff --git a/src/runtime/NEON/functions/NERemap.cpp b/src/runtime/NEON/functions/NERemap.cpp new file mode 100644 index 0000000000..9f06fb699c --- /dev/null +++ b/src/runtime/NEON/functions/NERemap.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NERemap.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/NERemapKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include + +using namespace arm_compute; + +void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported"); + + auto k = arm_compute::cpp14::make_unique(); + + k->configure(input, map_x, map_y, output, policy); + + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp new file mode 100644 index 0000000000..b70f626df0 --- /dev/null +++ b/src/runtime/NEON/functions/NEScale.cpp @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEScale.h" + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include +#include +#include + +using namespace arm_compute; + +namespace +{ +void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float wr, float hr, size_t input_element_size) +{ + ARM_COMPUTE_ERROR_ON(nullptr == offsets); + + Window win; + win.set(Window::DimX, Window::Dimension(0, offsets->info()->dimension(0), 1)); + win.set(Window::DimY, Window::Dimension(0, offsets->info()->dimension(1), 1)); + + if(dx != nullptr && dy != nullptr) + { + // Pre-compute the offset and pixel's distance for BILINEAR interpolation + Iterator offsets_it(offsets, win); + Iterator dx_it(dx, win); + Iterator dy_it(dy, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const float in_x = (id.x() + 0.5f) * wr - 0.5f; + const float in_y = (id.y() + 0.5f) * hr - 0.5f; + const int in_xi = std::floor(in_x); + const int in_yi = std::floor(in_y); + + *reinterpret_cast(offsets_it.ptr()) = in_xi * input_element_size; + *reinterpret_cast(dx_it.ptr()) = in_x - in_xi; + *reinterpret_cast(dy_it.ptr()) = in_y - in_yi; + }, + offsets_it, dx_it, dy_it); + } + else + { + // Pre-compute the offset for NEAREST interpolation + Iterator offsets_it(offsets, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const size_t in_xi = (id.x() + 0.5f) * wr; + + *reinterpret_cast(offsets_it.ptr()) = in_xi * input_element_size; + }, + offsets_it); + } +} +} // namespace + +NEScale::NEScale() + : _offsets(), _dx(), _dy() +{ +} + +void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) + { + ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i)); + } + + // Get the tensor shape + const TensorShape shape(output->info()->dimension(0), output->info()->dimension(1)); + + // Compute the ratio between source width/height and destination width/height + const auto wr = static_cast(input->info()->dimension(0)) / static_cast(output->info()->dimension(0)); + const auto hr = static_cast(input->info()->dimension(1)) / static_cast(output->info()->dimension(1)); + + // Get the element size of the input image + const size_t input_element_size = input->info()->element_size(); + + // Area interpolation behaves as Nearest Neighbour in case of up-sampling + if(policy == InterpolationPolicy::AREA && wr <= 1.f && hr <= 1.f) + { + policy = InterpolationPolicy::NEAREST_NEIGHBOR; + } + + auto k = arm_compute::cpp14::make_unique(); + + // Check if the border mode is UNDEFINED + const bool border_undefined = border_mode == BorderMode::UNDEFINED; + + switch(policy) + { + case InterpolationPolicy::NEAREST_NEIGHBOR: + { + TensorInfo tensor_info_offsets(shape, Format::S32); + _offsets.allocator()->init(tensor_info_offsets); + + k->configure(input, nullptr, nullptr, &_offsets, output, policy, border_undefined); + + // Allocate once the configure methods have been called + _offsets.allocator()->allocate(); + + // Pre-compute offsets for nearest interpolation + precompute_dx_dy_offsets(nullptr, nullptr, &_offsets, wr, hr, input_element_size); + break; + } + case InterpolationPolicy::BILINEAR: + { + TensorInfo tensor_info_offsets(shape, Format::S32); + TensorInfo tensor_info_dxdy(shape, Format::F32); + + _offsets.allocator()->init(tensor_info_offsets); + _dx.allocator()->init(tensor_info_dxdy); + _dy.allocator()->init(tensor_info_dxdy); + + k->configure(input, &_dx, &_dy, &_offsets, output, policy, border_undefined); + + // Allocate once the configure methods have been called + _offsets.allocator()->allocate(); + _dx.allocator()->allocate(); + _dy.allocator()->allocate(); + + // Pre-compute dx, dy and offsets for bilinear interpolation + precompute_dx_dy_offsets(&_dx, &_dy, &_offsets, wr, hr, input_element_size); + break; + } + case InterpolationPolicy::AREA: + { + k->configure(input, nullptr, nullptr, nullptr, output, policy, border_undefined); + break; + } + default: + ARM_COMPUTE_ERROR("Unsupported interpolation mode"); + } + + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NEScharr3x3.cpp b/src/runtime/NEON/functions/NEScharr3x3.cpp new file mode 100644 index 0000000000..04b3f14ce7 --- /dev/null +++ b/src/runtime/NEON/functions/NEScharr3x3.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void NEScharr3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NESobel3x3.cpp b/src/runtime/NEON/functions/NESobel3x3.cpp new file mode 100644 index 0000000000..3b46fd78c1 --- /dev/null +++ b/src/runtime/NEON/functions/NESobel3x3.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NESobel3x3.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h" +#include "arm_compute/core/PixelValue.h" + +#include + +using namespace arm_compute; + +void NESobel3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _kernel = std::move(k); + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); +} diff --git a/src/runtime/NEON/functions/NESobel5x5.cpp b/src/runtime/NEON/functions/NESobel5x5.cpp new file mode 100644 index 0000000000..8967a22ba1 --- /dev/null +++ b/src/runtime/NEON/functions/NESobel5x5.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NESobel5x5.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/TensorAllocator.h" + +using namespace arm_compute; + +NESobel5x5::NESobel5x5() + : _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler() +{ +} + +void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + + const bool run_sobel_x = output_x != nullptr; + const bool run_sobel_y = output_y != nullptr; + + TensorInfo tensor_info(input->info()->tensor_shape(), Format::S16); + + if(run_sobel_x && run_sobel_y) + { + _tmp_x.allocator()->init(tensor_info); + _tmp_y.allocator()->init(tensor_info); + _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _tmp_x.allocator()->allocate(); + _tmp_y.allocator()->allocate(); + } + else if(run_sobel_x) + { + _tmp_x.allocator()->init(tensor_info); + _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); + _tmp_x.allocator()->allocate(); + } + else if(run_sobel_y) + { + _tmp_y.allocator()->init(tensor_info); + _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); + _tmp_y.allocator()->allocate(); + } + + _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value)); +} + +void NESobel5x5::run() +{ + _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_sobel_hor, Window::DimY); + NEScheduler::get().schedule(&_sobel_vert, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NESobel7x7.cpp b/src/runtime/NEON/functions/NESobel7x7.cpp new file mode 100644 index 0000000000..f628da9709 --- /dev/null +++ b/src/runtime/NEON/functions/NESobel7x7.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NESobel7x7.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/TensorAllocator.h" + +using namespace arm_compute; + +NESobel7x7::NESobel7x7() + : _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler() +{ +} + +void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + + const bool run_sobel_x = output_x != nullptr; + const bool run_sobel_y = output_y != nullptr; + + TensorInfo tensor_info(input->info()->tensor_shape(), Format::S32); + + if(run_sobel_x && run_sobel_y) + { + _tmp_x.allocator()->init(tensor_info); + _tmp_y.allocator()->init(tensor_info); + _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED); + _tmp_x.allocator()->allocate(); + _tmp_y.allocator()->allocate(); + } + else if(run_sobel_x) + { + _tmp_x.allocator()->init(tensor_info); + _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED); + _tmp_x.allocator()->allocate(); + } + else if(run_sobel_y) + { + _tmp_y.allocator()->init(tensor_info); + _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED); + _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED); + _tmp_y.allocator()->allocate(); + } + + _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value)); +} + +void NESobel7x7::run() +{ + _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_sobel_hor, Window::DimY); + NEScheduler::get().schedule(&_sobel_vert, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp new file mode 100644 index 0000000000..0651eab1bc --- /dev/null +++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +#include + +using namespace arm_compute; + +NESoftmaxLayer::NESoftmaxLayer() + : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _fill_border_kernel(), _max(), _sum(), _tmp() +{ +} + +void NESoftmaxLayer::configure(ITensor *input, ITensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32); + + // Create intermediate tensors shapes + TensorInfo tensor_info_tmp(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()); + _tmp.allocator()->init(tensor_info_tmp); + + TensorShape shape = input->info()->tensor_shape(); + shape.set(0, 1); + TensorInfo tensor_info_max_sum(shape, input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()); + _max.allocator()->init(tensor_info_max_sum); + _sum.allocator()->init(tensor_info_max_sum); + + // Configure Kernels + _max_kernel.configure(input, &_max); + _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum); + _norm_kernel.configure(&_tmp, &_sum, output); + _fill_border_kernel.configure(input, _max_kernel.border_size(), BorderMode::CONSTANT, PixelValue(-FLT_MAX)); + + // Allocate intermediate tensors + _tmp.allocator()->allocate(); + _max.allocator()->allocate(); + _sum.allocator()->allocate(); +} + +void NESoftmaxLayer::run() +{ + NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); + NEScheduler::get().schedule(&_max_kernel, Window::DimY); + NEScheduler::get().schedule(&_shift_exp_sum_kernel, Window::DimY); + NEScheduler::get().schedule(&_norm_kernel, Window::DimY); +} diff --git a/src/runtime/NEON/functions/NETableLookup.cpp b/src/runtime/NEON/functions/NETableLookup.cpp new file mode 100644 index 0000000000..ebb8a0ac9b --- /dev/null +++ b/src/runtime/NEON/functions/NETableLookup.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NETableLookup.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" + +#include + +using namespace arm_compute; + +void NETableLookup::configure(const ITensor *input, const ILut *lut, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, lut, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEThreshold.cpp b/src/runtime/NEON/functions/NEThreshold.cpp new file mode 100644 index 0000000000..93dc124880 --- /dev/null +++ b/src/runtime/NEON/functions/NEThreshold.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEThreshold.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" + +#include + +using namespace arm_compute; + +void NEThreshold::configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output, threshold, false_value, true_value, type, upper); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NETranspose.cpp b/src/runtime/NEON/functions/NETranspose.cpp new file mode 100644 index 0000000000..53ac9c5ee3 --- /dev/null +++ b/src/runtime/NEON/functions/NETranspose.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NETranspose.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" + +#include + +using namespace arm_compute; + +void NETranspose::configure(const ITensor *input, ITensor *output) +{ + auto k = arm_compute::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NEWarpAffine.cpp b/src/runtime/NEON/functions/NEWarpAffine.cpp new file mode 100644 index 0000000000..24fb16f9e3 --- /dev/null +++ b/src/runtime/NEON/functions/NEWarpAffine.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" + +#include + +using namespace arm_compute; + +void NEWarpAffine::configure(ITensor *input, ITensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(nullptr == matrix); + + switch(policy) + { + case InterpolationPolicy::NEAREST_NEIGHBOR: + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(input, output, matrix, border_mode, constant_border_value); + _kernel = std::move(k); + break; + } + case InterpolationPolicy::BILINEAR: + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(input, output, matrix, border_mode, constant_border_value); + _kernel = std::move(k); + break; + } + case InterpolationPolicy::AREA: + default: + ARM_COMPUTE_ERROR("Interpolation type not supported"); + } + + _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value); +} diff --git a/src/runtime/NEON/functions/NEWarpPerspective.cpp b/src/runtime/NEON/functions/NEWarpPerspective.cpp new file mode 100644 index 0000000000..84b2df5bfa --- /dev/null +++ b/src/runtime/NEON/functions/NEWarpPerspective.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" + +#include + +using namespace arm_compute; + +void NEWarpPerspective::configure(ITensor *input, ITensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON(nullptr == matrix); + + switch(policy) + { + case InterpolationPolicy::NEAREST_NEIGHBOR: + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(input, output, matrix, border_mode, constant_border_value); + _kernel = std::move(k); + break; + } + case InterpolationPolicy::BILINEAR: + { + auto k = arm_compute::cpp14::make_unique>(); + k->configure(input, output, matrix, border_mode, constant_border_value); + _kernel = std::move(k); + break; + } + case InterpolationPolicy::AREA: + default: + ARM_COMPUTE_ERROR("Interpolation type not supported"); + } + + _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value); +} diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp new file mode 100644 index 0000000000..0cced73276 --- /dev/null +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/OMP/OMPScheduler.h" + +#include "arm_compute/core/CPP/ICPPKernel.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" + +#include + +using namespace arm_compute; + +OMPScheduler &OMPScheduler::get() +{ + static OMPScheduler scheduler; + return scheduler; +} + +OMPScheduler::OMPScheduler() + : _num_threads(omp_get_max_threads()) +{ +} + +unsigned int OMPScheduler::num_threads() const +{ + return _num_threads; +} + +void OMPScheduler::set_num_threads(unsigned int num_threads) +{ + const unsigned int num_cores = omp_get_max_threads(); + _num_threads = num_threads == 0 ? num_cores : num_threads; +} + +void OMPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) +{ + ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); + + const Window &max_window = kernel->window(); + const unsigned int num_iterations = max_window.num_iterations(split_dimension); + const unsigned int num_threads = std::min(num_iterations, _num_threads); + + if(!kernel->is_parallelisable() || 1 == num_threads) + { + kernel->run(max_window); + } + else + { + #pragma omp parallel num_threads(num_threads) + { + #pragma omp for + for(unsigned int t = 0; t < num_threads; ++t) + { + Window win = max_window.split_window(split_dimension, t, num_threads); + win.set_thread_id(t); + win.set_num_threads(num_threads); + kernel->run(win); + } + } + } +} diff --git a/src/runtime/Pyramid.cpp b/src/runtime/Pyramid.cpp new file mode 100644 index 0000000000..f1b6c93b50 --- /dev/null +++ b/src/runtime/Pyramid.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/Pyramid.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PyramidInfo.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/TensorShape.h" + +#include + +using namespace arm_compute; + +void Pyramid::init(const PyramidInfo &info) +{ + internal_init(info, false); +} + +void Pyramid::init_auto_padding(const PyramidInfo &info) +{ + internal_init(info, true); +} + +void Pyramid::internal_init(const PyramidInfo &info, bool auto_padding) +{ + _info = info; + _pyramid = arm_compute::cpp14::make_unique(_info.num_levels()); + + size_t w = _info.width(); + size_t h = _info.height(); + size_t ref_w = w; + size_t ref_h = h; + bool is_orb_scale = (SCALE_PYRAMID_ORB == _info.scale()); + TensorShape tensor_shape = _info.tensor_shape(); + + // Note: Look-up table used by the OpenVX sample implementation + const float c_orbscale[4] = { 0.5f, + SCALE_PYRAMID_ORB, + SCALE_PYRAMID_ORB * SCALE_PYRAMID_ORB, + SCALE_PYRAMID_ORB *SCALE_PYRAMID_ORB * SCALE_PYRAMID_ORB + }; + + for(size_t i = 0; i < _info.num_levels(); ++i) + { + TensorInfo tensor_info(tensor_shape, _info.format()); + + if(auto_padding) + { + tensor_info.auto_padding(); + } + + (_pyramid.get() + i)->allocator()->init(tensor_info); + + if(is_orb_scale) + { + float orb_scale = c_orbscale[(i + 1) % 4]; + w = static_cast(std::ceil(static_cast(ref_w) * orb_scale)); + h = static_cast(std::ceil(static_cast(ref_h) * orb_scale)); + + if(0 == ((i + 1) % 4)) + { + ref_w = w; + ref_h = h; + } + } + else + { + w = (w + 1) * _info.scale(); + h = (h + 1) * _info.scale(); + } + + // Update tensor_shape + tensor_shape.set(0, w); + tensor_shape.set(1, h); + } +} + +void Pyramid::allocate() +{ + ARM_COMPUTE_ERROR_ON(_pyramid == nullptr); + + for(size_t i = 0; i < _info.num_levels(); ++i) + { + (_pyramid.get() + i)->allocator()->allocate(); + } +} + +const PyramidInfo *Pyramid::info() const +{ + return &_info; +} + +Tensor *Pyramid::get_pyramid_level(size_t index) const +{ + ARM_COMPUTE_ERROR_ON(index >= _info.num_levels()); + + return (_pyramid.get() + index); +} diff --git a/src/runtime/Scheduler.cpp b/src/runtime/Scheduler.cpp new file mode 100644 index 0000000000..a131928293 --- /dev/null +++ b/src/runtime/Scheduler.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/Scheduler.h" + +#include "arm_compute/core/Error.h" +#if ARM_COMPUTE_CPP_SCHEDULER +#include "arm_compute/runtime/CPP/CPPScheduler.h" +#endif + +#include "arm_compute/runtime/SingleThreadScheduler.h" + +#if ARM_COMPUTE_OPENMP_SCHEDULER +#include "arm_compute/runtime/OMP/OMPScheduler.h" +#endif + +using namespace arm_compute; + +#if !ARM_COMPUTE_CPP_SCHEDULER && ARM_COMPUTE_OPENMP_SCHEDULER +Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::OMP; +#elif ARM_COMPUTE_CPP_SCHEDULER && !ARM_COMPUTE_OPENMP_SCHEDULER +Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::CPP; +#elif ARM_COMPUTE_CPP_SCHEDULER && ARM_COMPUTE_OPENMP_SCHEDULER +Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::CPP; +#else +Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::ST; +#endif + +void Scheduler::set(Type t) +{ + ARM_COMPUTE_ERROR_ON(!Scheduler::is_available(t)); + _scheduler_type = t; +} + +bool Scheduler::is_available(Type t) +{ + switch(t) + { + case Type::ST: + { + return true; + } + case Type::CPP: + { +#if ARM_COMPUTE_CPP_SCHEDULER + return true; +#else + return false; +#endif + } + case Type::OMP: + { +#if ARM_COMPUTE_OPENMP_SCHEDULER + return true; +#else + return false; +#endif + } + case Type::CUSTOM: + { + return _custom_scheduler != nullptr; + } + default: + { + ARM_COMPUTE_ERROR("Invalid Scheduler type"); + return false; + } + } +} + +Scheduler::Type Scheduler::get_type() +{ + return _scheduler_type; +} + +IScheduler &Scheduler::get() +{ + switch(_scheduler_type) + { + case Type::ST: + { + return SingleThreadScheduler::get(); + } + case Type::CPP: + { +#if ARM_COMPUTE_CPP_SCHEDULER + return CPPScheduler::get(); +#else + ARM_COMPUTE_ERROR("Recompile with cppthreads=1 to use C++11 scheduler."); +#endif + break; + } + case Type::OMP: + { +#if ARM_COMPUTE_OPENMP_SCHEDULER + return OMPScheduler::get(); +#else + ARM_COMPUTE_ERROR("Recompile with openmp=1 to use openmp scheduler."); +#endif + break; + } + case Type::CUSTOM: + { + if(_custom_scheduler == nullptr) + { + ARM_COMPUTE_ERROR("No custom scheduler has been setup. Call set(std::shared_ptr &scheduler) before Scheduler::get()"); + } + else + { + return *_custom_scheduler; + } + break; + } + default: + { + ARM_COMPUTE_ERROR("Invalid Scheduler type"); + break; + } + } + return SingleThreadScheduler::get(); +} + +std::shared_ptr Scheduler::_custom_scheduler = nullptr; + +void Scheduler::set(std::shared_ptr &scheduler) +{ + _custom_scheduler = scheduler; + set(Type::CUSTOM); +} diff --git a/src/runtime/SubTensor.cpp b/src/runtime/SubTensor.cpp new file mode 100644 index 0000000000..32924be3dc --- /dev/null +++ b/src/runtime/SubTensor.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/SubTensor.h" + +#include "arm_compute/core/Error.h" + +using namespace arm_compute; + +SubTensor::SubTensor(ITensor *parent, const TensorShape &tensor_shape, const Coordinates &coords) + : _parent(nullptr), _info() +{ + ARM_COMPUTE_ERROR_ON(parent == nullptr); + _info = SubTensorInfo(parent->info(), tensor_shape, coords); + _parent = parent; +} + +ITensorInfo *SubTensor::info() const +{ + return &_info; +} + +ITensorInfo *SubTensor::info() +{ + return &_info; +} + +uint8_t *SubTensor::buffer() const +{ + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->buffer(); +} + +ITensor *SubTensor::parent() +{ + return _parent; +} diff --git a/src/runtime/Tensor.cpp b/src/runtime/Tensor.cpp new file mode 100644 index 0000000000..435068c61d --- /dev/null +++ b/src/runtime/Tensor.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/Tensor.h" + +using namespace arm_compute; + +Tensor::Tensor() + : _allocator() +{ +} + +ITensorInfo *Tensor::info() const +{ + return &_allocator.info(); +} + +ITensorInfo *Tensor::info() +{ + return &_allocator.info(); +} + +uint8_t *Tensor::buffer() const +{ + return _allocator.data(); +} + +TensorAllocator *Tensor::allocator() +{ + return &_allocator; +} diff --git a/src/runtime/TensorAllocator.cpp b/src/runtime/TensorAllocator.cpp new file mode 100644 index 0000000000..5c719c761a --- /dev/null +++ b/src/runtime/TensorAllocator.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/TensorAllocator.h" + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" + +#include + +using namespace arm_compute; + +namespace +{ +bool validate_subtensor_shape(const TensorInfo &parent_info, const TensorInfo &child_info, const Coordinates &coords) +{ + bool is_valid = true; + const TensorShape &parent_shape = parent_info.tensor_shape(); + const TensorShape &child_shape = child_info.tensor_shape(); + const size_t parent_dims = parent_info.num_dimensions(); + const size_t child_dims = child_info.num_dimensions(); + + if(child_dims <= parent_dims) + { + for(size_t num_dimensions = child_dims; num_dimensions > 0; --num_dimensions) + { + const size_t child_dim_size = coords[num_dimensions - 1] + child_shape[num_dimensions - 1]; + + if((coords[num_dimensions - 1] < 0) || (child_dim_size > parent_shape[num_dimensions - 1])) + { + is_valid = false; + break; + } + } + } + else + { + is_valid = false; + } + + return is_valid; +} +} // namespace + +TensorAllocator::TensorAllocator() + : _buffer(nullptr) +{ +} + +void TensorAllocator::init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo sub_info) +{ + // Get parent info + const TensorInfo parent_info = allocator.info(); + + // Check if coordinates and new shape are within the parent tensor + ARM_COMPUTE_ERROR_ON(!validate_subtensor_shape(parent_info, sub_info, coords)); + ARM_COMPUTE_UNUSED(validate_subtensor_shape); + + // Copy pointer to buffer + _buffer = allocator._buffer; + + // Init tensor info with new dimensions + size_t total_size = parent_info.offset_element_in_bytes(coords) + sub_info.total_size() - sub_info.offset_first_element_in_bytes(); + sub_info.init(sub_info.tensor_shape(), sub_info.format(), parent_info.strides_in_bytes(), parent_info.offset_element_in_bytes(coords), total_size); + + // Set TensorInfo + init(sub_info); +} + +uint8_t *TensorAllocator::data() const +{ + return (_buffer != nullptr) ? _buffer.get()->data() : nullptr; +} + +void TensorAllocator::allocate() +{ + ARM_COMPUTE_ERROR_ON(_buffer != nullptr); + + _buffer = std::make_shared>(info().total_size()); + info().set_is_resizable(false); +} + +void TensorAllocator::free() +{ + ARM_COMPUTE_ERROR_ON(_buffer == nullptr); + + _buffer.reset(); + info().set_is_resizable(true); +} + +uint8_t *TensorAllocator::lock() +{ + return (_buffer != nullptr) ? _buffer.get()->data() : nullptr; +} + +void TensorAllocator::unlock() +{ +} diff --git a/src/runtime/Utils.cpp b/src/runtime/Utils.cpp new file mode 100644 index 0000000000..1b06117c7b --- /dev/null +++ b/src/runtime/Utils.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/Utils.h" + +#include +#include + +using namespace arm_compute; + +const std::string &arm_compute::string_from_scheduler_type(Scheduler::Type t) +{ + static std::map scheduler_type_map = + { + { Scheduler::Type::ST, "Single Thread" }, + { Scheduler::Type::CPP, "C++11 Threads" }, + { Scheduler::Type::OMP, "OpenMP Threads" }, + { Scheduler::Type::CUSTOM, "Custom" } + }; + + return scheduler_type_map[t]; +} -- cgit v1.2.1