aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-09-04 18:44:23 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 13:03:09 +0100
commit6ff3b19ee6120edf015fad8caab2991faa3070af (patch)
treea7a6dcd16dfd56d79fa1b56a313caeebcc939b68 /src/runtime/NEON
downloadComputeLibrary-6ff3b19ee6120edf015fad8caab2991faa3070af.tar.gz
COMPMID-344 Updated doxygen
Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae
Diffstat (limited to 'src/runtime/NEON')
-rw-r--r--src/runtime/NEON/INESimpleFunction.cpp39
-rw-r--r--src/runtime/NEON/functions/NEAbsoluteDifference.cpp38
-rw-r--r--src/runtime/NEON/functions/NEAccumulate.cpp61
-rw-r--r--src/runtime/NEON/functions/NEActivationLayer.cpp36
-rw-r--r--src/runtime/NEON/functions/NEArithmeticAddition.cpp38
-rw-r--r--src/runtime/NEON/functions/NEArithmeticSubtraction.cpp38
-rw-r--r--src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp49
-rw-r--r--src/runtime/NEON/functions/NEBitwiseAnd.cpp38
-rw-r--r--src/runtime/NEON/functions/NEBitwiseNot.cpp38
-rw-r--r--src/runtime/NEON/functions/NEBitwiseOr.cpp38
-rw-r--r--src/runtime/NEON/functions/NEBitwiseXor.cpp38
-rw-r--r--src/runtime/NEON/functions/NEBox3x3.cpp49
-rw-r--r--src/runtime/NEON/functions/NECannyEdge.cpp169
-rw-r--r--src/runtime/NEON/functions/NEChannelCombine.cpp45
-rw-r--r--src/runtime/NEON/functions/NEChannelExtract.cpp45
-rw-r--r--src/runtime/NEON/functions/NEColorConvert.cpp59
-rw-r--r--src/runtime/NEON/functions/NEConvolution.cpp120
-rw-r--r--src/runtime/NEON/functions/NEConvolutionLayer.cpp246
-rw-r--r--src/runtime/NEON/functions/NEDepthConcatenate.cpp67
-rw-r--r--src/runtime/NEON/functions/NEDepthConvert.cpp44
-rw-r--r--src/runtime/NEON/functions/NEDerivative.cpp52
-rw-r--r--src/runtime/NEON/functions/NEDilate.cpp40
-rw-r--r--src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp75
-rw-r--r--src/runtime/NEON/functions/NEEqualizeHistogram.cpp62
-rw-r--r--src/runtime/NEON/functions/NEErode.cpp40
-rw-r--r--src/runtime/NEON/functions/NEFastCorners.cpp101
-rw-r--r--src/runtime/NEON/functions/NEFillBorder.cpp39
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp344
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp156
-rw-r--r--src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp36
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowp.cpp84
-rw-r--r--src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp40
-rw-r--r--src/runtime/NEON/functions/NEGaussian3x3.cpp40
-rw-r--r--src/runtime/NEON/functions/NEGaussian5x5.cpp60
-rw-r--r--src/runtime/NEON/functions/NEGaussianPyramid.cpp183
-rw-r--r--src/runtime/NEON/functions/NEHOGDescriptor.cpp99
-rw-r--r--src/runtime/NEON/functions/NEHOGDetector.cpp36
-rw-r--r--src/runtime/NEON/functions/NEHOGGradient.cpp80
-rw-r--r--src/runtime/NEON/functions/NEHOGMultiDetection.cpp231
-rw-r--r--src/runtime/NEON/functions/NEHarrisCorners.cpp212
-rw-r--r--src/runtime/NEON/functions/NEHistogram.cpp58
-rw-r--r--src/runtime/NEON/functions/NEIntegralImage.cpp40
-rw-r--r--src/runtime/NEON/functions/NELaplacianPyramid.cpp102
-rw-r--r--src/runtime/NEON/functions/NELaplacianReconstruct.cpp100
-rw-r--r--src/runtime/NEON/functions/NELocallyConnectedLayer.cpp131
-rw-r--r--src/runtime/NEON/functions/NEMagnitude.cpp48
-rw-r--r--src/runtime/NEON/functions/NEMeanStdDev.cpp47
-rw-r--r--src/runtime/NEON/functions/NEMedian3x3.cpp40
-rw-r--r--src/runtime/NEON/functions/NEMinMaxLocation.cpp50
-rw-r--r--src/runtime/NEON/functions/NENonLinearFilter.cpp42
-rw-r--r--src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp47
-rw-r--r--src/runtime/NEON/functions/NENormalizationLayer.cpp61
-rw-r--r--src/runtime/NEON/functions/NEOpticalFlow.cpp119
-rw-r--r--src/runtime/NEON/functions/NEPhase.cpp38
-rw-r--r--src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp38
-rw-r--r--src/runtime/NEON/functions/NEPoolingLayer.cpp41
-rw-r--r--src/runtime/NEON/functions/NERemap.cpp53
-rw-r--r--src/runtime/NEON/functions/NEScale.cpp171
-rw-r--r--src/runtime/NEON/functions/NEScharr3x3.cpp40
-rw-r--r--src/runtime/NEON/functions/NESobel3x3.cpp40
-rw-r--r--src/runtime/NEON/functions/NESobel5x5.cpp81
-rw-r--r--src/runtime/NEON/functions/NESobel7x7.cpp81
-rw-r--r--src/runtime/NEON/functions/NESoftmaxLayer.cpp72
-rw-r--r--src/runtime/NEON/functions/NETableLookup.cpp38
-rw-r--r--src/runtime/NEON/functions/NEThreshold.cpp38
-rw-r--r--src/runtime/NEON/functions/NETranspose.cpp38
-rw-r--r--src/runtime/NEON/functions/NEWarpAffine.cpp62
-rw-r--r--src/runtime/NEON/functions/NEWarpPerspective.cpp62
68 files changed, 5083 insertions, 0 deletions
diff --git a/src/runtime/NEON/INESimpleFunction.cpp b/src/runtime/NEON/INESimpleFunction.cpp
new file mode 100644
index 0000000000..6f0da85fc8
--- /dev/null
+++ b/src/runtime/NEON/INESimpleFunction.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+INESimpleFunction::INESimpleFunction()
+ : _kernel(), _border_handler()
+{
+}
+
+void INESimpleFunction::run()
+{
+ _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(_kernel.get(), Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEAbsoluteDifference.cpp b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp
new file mode 100644
index 0000000000..b39feb3a2b
--- /dev/null
+++ b/src/runtime/NEON/functions/NEAbsoluteDifference.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEAbsoluteDifference::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEAbsoluteDifferenceKernel>();
+ k->configure(input1, input2, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEAccumulate.cpp b/src/runtime/NEON/functions/NEAccumulate.cpp
new file mode 100644
index 0000000000..c39abfc540
--- /dev/null
+++ b/src/runtime/NEON/functions/NEAccumulate.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEAccumulate::configure(const ITensor *input, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEAccumulateKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
+
+void NEAccumulateWeighted::configure(const ITensor *input, float alpha, ITensor *output, bool use_fp16)
+{
+ if(use_fp16)
+ {
+ auto k = arm_compute::cpp14::make_unique<NEAccumulateWeightedFP16Kernel>();
+ k->configure(input, alpha, output);
+ _kernel = std::move(k);
+ }
+ else
+ {
+ auto k = arm_compute::cpp14::make_unique<NEAccumulateWeightedKernel>();
+ k->configure(input, alpha, output);
+ _kernel = std::move(k);
+ }
+}
+
+void NEAccumulateSquared::configure(const ITensor *input, uint32_t shift, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEAccumulateSquaredKernel>();
+ k->configure(input, shift, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEActivationLayer.cpp b/src/runtime/NEON/functions/NEActivationLayer.cpp
new file mode 100644
index 0000000000..f5d81d7cd8
--- /dev/null
+++ b/src/runtime/NEON/functions/NEActivationLayer.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
+
+using namespace arm_compute;
+
+void NEActivationLayer::configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
+{
+ auto k = arm_compute::cpp14::make_unique<NEActivationLayerKernel>();
+ k->configure(input, output, activation_info);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEArithmeticAddition.cpp b/src/runtime/NEON/functions/NEArithmeticAddition.cpp
new file mode 100644
index 0000000000..50cc38b489
--- /dev/null
+++ b/src/runtime/NEON/functions/NEArithmeticAddition.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEArithmeticAddition::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy)
+{
+ auto k = arm_compute::cpp14::make_unique<NEArithmeticAdditionKernel>();
+ k->configure(input1, input2, output, policy);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
new file mode 100644
index 0000000000..a3d27c0ed6
--- /dev/null
+++ b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEArithmeticSubtraction::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy)
+{
+ auto k = arm_compute::cpp14::make_unique<NEArithmeticSubtractionKernel>();
+ k->configure(input1, input2, output, policy);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp
new file mode 100644
index 0000000000..a24429c6de
--- /dev/null
+++ b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NEBatchNormalizationLayer::NEBatchNormalizationLayer()
+ : _norm_kernel()
+{
+}
+
+void NEBatchNormalizationLayer::configure(const ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon)
+{
+ // Configure kernel
+ _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon);
+}
+
+void NEBatchNormalizationLayer::run()
+{
+ NEScheduler::get().schedule(&_norm_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEBitwiseAnd.cpp b/src/runtime/NEON/functions/NEBitwiseAnd.cpp
new file mode 100644
index 0000000000..5aafc51dc0
--- /dev/null
+++ b/src/runtime/NEON/functions/NEBitwiseAnd.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEBitwiseAnd::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEBitwiseAndKernel>();
+ k->configure(input1, input2, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEBitwiseNot.cpp b/src/runtime/NEON/functions/NEBitwiseNot.cpp
new file mode 100644
index 0000000000..af3df6e46a
--- /dev/null
+++ b/src/runtime/NEON/functions/NEBitwiseNot.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEBitwiseNot::configure(const ITensor *input, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEBitwiseNotKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEBitwiseOr.cpp b/src/runtime/NEON/functions/NEBitwiseOr.cpp
new file mode 100644
index 0000000000..d12c5e5f6f
--- /dev/null
+++ b/src/runtime/NEON/functions/NEBitwiseOr.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEBitwiseOr::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEBitwiseOrKernel>();
+ k->configure(input1, input2, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEBitwiseXor.cpp b/src/runtime/NEON/functions/NEBitwiseXor.cpp
new file mode 100644
index 0000000000..65c943e64c
--- /dev/null
+++ b/src/runtime/NEON/functions/NEBitwiseXor.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEBitwiseXor::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEBitwiseXorKernel>();
+ k->configure(input1, input2, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEBox3x3.cpp b/src/runtime/NEON/functions/NEBox3x3.cpp
new file mode 100644
index 0000000000..7f0b45d34c
--- /dev/null
+++ b/src/runtime/NEON/functions/NEBox3x3.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEBox3x3.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h"
+#include "arm_compute/core/PixelValue.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEBox3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
+{
+ if(use_fp16)
+ {
+ auto k = arm_compute::cpp14::make_unique<NEBox3x3FP16Kernel>();
+ k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ }
+ else
+ {
+ auto k = arm_compute::cpp14::make_unique<NEBox3x3Kernel>();
+ k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ }
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp
new file mode 100644
index 0000000000..26f31f557b
--- /dev/null
+++ b/src/runtime/NEON/functions/NECannyEdge.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NECannyEdge.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/NEON/functions/NESobel3x3.h"
+#include "arm_compute/runtime/NEON/functions/NESobel5x5.h"
+#include "arm_compute/runtime/NEON/functions/NESobel7x7.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include <cstring>
+#include <utility>
+
+using namespace arm_compute;
+
+NECannyEdge::NECannyEdge()
+ : _sobel(), _gradient(), _non_max_suppr(), _edge_trace(), _border_mag_gradient(), _border_edge_trace(), _gx(), _gy(), _magnitude(), _phase(), _nonmax(), _output(nullptr)
+{
+}
+
+void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value,
+ bool use_fp16)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON(gradient_size < 3);
+ ARM_COMPUTE_ERROR_ON(gradient_size > 7);
+ ARM_COMPUTE_ERROR_ON(lower_thr > upper_thr);
+ ARM_COMPUTE_ERROR_ON((1 != norm_type) && (2 != norm_type));
+
+ _output = output;
+
+ const TensorShape &shape = input->info()->tensor_shape();
+ TensorInfo gradient_info;
+ TensorInfo magnitude_info;
+
+ // Initialize images
+ if(gradient_size < 7)
+ {
+ gradient_info.init(shape, Format::S16);
+ magnitude_info.init(shape, Format::U16);
+ }
+ else
+ {
+ gradient_info.init(shape, Format::S32);
+ magnitude_info.init(shape, Format::U32);
+ }
+
+ _gx.allocator()->init(gradient_info);
+ _gy.allocator()->init(gradient_info);
+ _magnitude.allocator()->init(magnitude_info);
+
+ TensorInfo info(shape, Format::U8);
+ _phase.allocator()->init(info);
+ _nonmax.allocator()->init(info);
+
+ // Configure/Init sobelNxN
+ if(gradient_size == 3)
+ {
+ auto k = arm_compute::cpp14::make_unique<NESobel3x3>();
+ k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+ _sobel = std::move(k);
+ }
+ else if(gradient_size == 5)
+ {
+ auto k = arm_compute::cpp14::make_unique<NESobel5x5>();
+ k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+ _sobel = std::move(k);
+ }
+ else if(gradient_size == 7)
+ {
+ auto k = arm_compute::cpp14::make_unique<NESobel7x7>();
+ k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+ _sobel = std::move(k);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Gradient size not supported\n");
+ }
+
+ // Configure gradient
+ if(use_fp16)
+ {
+ auto k = arm_compute::cpp14::make_unique<NEGradientFP16Kernel>();
+ k->configure(&_gx, &_gy, &_magnitude, &_phase, norm_type);
+ _gradient = std::move(k);
+ }
+ else
+ {
+ auto k = arm_compute::cpp14::make_unique<NEGradientKernel>();
+ k->configure(&_gx, &_gy, &_magnitude, &_phase, norm_type);
+ _gradient = std::move(k);
+ }
+
+ // Configure non-maxima suppression
+ _non_max_suppr.configure(&_magnitude, &_phase, &_nonmax, upper_thr, lower_thr, border_mode == BorderMode::UNDEFINED);
+
+ // Fill border around magnitude image as non-maxima suppression will access
+ // it. If border mode is undefined filling the border is a nop.
+ _border_mag_gradient.configure(&_magnitude, _non_max_suppr.border_size(), border_mode, constant_border_value);
+
+ // Configure edge tracing
+ _edge_trace.configure(&_nonmax, output);
+
+ // Fill border with "No edge" to stop recursion in edge trace
+ _border_edge_trace.configure(&_nonmax, _edge_trace.border_size(), BorderMode::CONSTANT, 0);
+
+ // Allocate intermediate tensors
+ _gx.allocator()->allocate();
+ _gy.allocator()->allocate();
+ _phase.allocator()->allocate();
+ _magnitude.allocator()->allocate();
+ _nonmax.allocator()->allocate();
+}
+
+void NECannyEdge::run()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function");
+ ARM_COMPUTE_ERROR_ON(_output == nullptr);
+
+ // Run sobelNxN
+ _sobel->run();
+
+ // Fill border before non-maxima suppression. Nop for border mode undefined.
+ _border_mag_gradient.run(_border_mag_gradient.window());
+
+ // Run gradient
+ NEScheduler::get().schedule(_gradient.get(), Window::DimY);
+
+ // Run non-maxima suppression
+ NEScheduler::get().schedule(&_non_max_suppr, Window::DimY);
+
+ ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
+ memset(_output->buffer(), 0, _output->info()->total_size());
+
+ // Fill border before edge trace
+ _border_edge_trace.run(_border_edge_trace.window());
+
+ // Run edge tracing
+ _edge_trace.run(_edge_trace.window());
+}
diff --git a/src/runtime/NEON/functions/NEChannelCombine.cpp b/src/runtime/NEON/functions/NEChannelCombine.cpp
new file mode 100644
index 0000000000..84d4fff4ff
--- /dev/null
+++ b/src/runtime/NEON/functions/NEChannelCombine.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEChannelCombine.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEChannelCombine::configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEChannelCombineKernel>();
+ k->configure(plane0, plane1, plane2, plane3, output);
+ _kernel = std::move(k);
+}
+
+void NEChannelCombine::configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEChannelCombineKernel>();
+ k->configure(plane0, plane1, plane2, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEChannelExtract.cpp b/src/runtime/NEON/functions/NEChannelExtract.cpp
new file mode 100644
index 0000000000..634e918eac
--- /dev/null
+++ b/src/runtime/NEON/functions/NEChannelExtract.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEChannelExtract.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEChannelExtract::configure(const ITensor *input, Channel channel, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEChannelExtractKernel>();
+ k->configure(input, channel, output);
+ _kernel = std::move(k);
+}
+
+void NEChannelExtract::configure(const IMultiImage *input, Channel channel, IImage *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEChannelExtractKernel>();
+ k->configure(input, channel, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEColorConvert.cpp b/src/runtime/NEON/functions/NEColorConvert.cpp
new file mode 100644
index 0000000000..bbaa832284
--- /dev/null
+++ b/src/runtime/NEON/functions/NEColorConvert.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEColorConvert.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEColorConvert::configure(const ITensor *input, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEColorConvertKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
+
+void NEColorConvert::configure(const IMultiImage *input, IImage *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEColorConvertKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
+
+void NEColorConvert::configure(const IImage *input, IMultiImage *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEColorConvertKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
+
+void NEColorConvert::configure(const IMultiImage *input, IMultiImage *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEColorConvertKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp
new file mode 100644
index 0000000000..3f39ae2cbd
--- /dev/null
+++ b/src/runtime/NEON/functions/NEConvolution.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEConvolution.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include <array>
+#include <utility>
+
+using namespace arm_compute;
+
+void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+{
+ auto k = arm_compute::cpp14::make_unique<NEConvolution3x3Kernel>();
+ k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
+
+template <unsigned int matrix_size>
+NEConvolutionSquare<matrix_size>::NEConvolutionSquare()
+ : _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
+{
+}
+
+template <unsigned int matrix_size>
+void NEConvolutionSquare<matrix_size>::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON(conv == nullptr);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
+
+ std::array<int16_t, matrix_size> conv_col{ { 0 } };
+ std::array<int16_t, matrix_size> conv_row{ { 0 } };
+
+ _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size);
+
+ if(_is_separable)
+ {
+ DataType intermediate_type = DataType::UNKNOWN;
+ std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), matrix_size);
+
+ _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type));
+
+ if(scale == 0)
+ {
+ scale = calculate_matrix_scale(conv, matrix_size);
+ }
+
+ _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
+ _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
+
+ _tmp.allocator()->allocate();
+
+ _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ }
+ else
+ {
+ _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
+ _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
+ }
+}
+
+template <unsigned int matrix_size>
+void NEConvolutionSquare<matrix_size>::run()
+{
+ _border_handler.run(_border_handler.window());
+
+ if(_is_separable)
+ {
+ NEScheduler::get().schedule(&_kernel_hor, Window::DimY);
+ NEScheduler::get().schedule(&_kernel_vert, Window::DimY);
+ }
+ else
+ {
+ NEScheduler::get().schedule(&_kernel, Window::DimY);
+ }
+}
+
+template class arm_compute::NEConvolutionSquare<5>;
+template class arm_compute::NEConvolutionSquare<7>;
+template class arm_compute::NEConvolutionSquare<9>;
+
+void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+{
+ auto k = arm_compute::cpp14::make_unique<NEConvolutionRectangleKernel>();
+ k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
new file mode 100644
index 0000000000..bd688cffb6
--- /dev/null
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+#include <cmath>
+#include <tuple>
+
+using namespace arm_compute;
+
+NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights()
+ : _weights_reshape_kernel(), _weights_transposed_kernel(), _weights_reshaped(), _transpose1xW(false)
+{
+}
+
+void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output);
+ ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
+
+ if(biases != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, biases);
+ ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3));
+ ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
+ }
+
+ // Check if bias are present, if yes they will be embedded to the weights matrix
+ const bool _has_bias = (biases != nullptr);
+
+ _transpose1xW = transpose1xW;
+
+ if(transpose1xW)
+ {
+ // Create tensor to store the reshaped weights
+ const unsigned int mat_weights_cols = weights->info()->dimension(3);
+ const unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + (_has_bias ? 1 : 0);
+ TensorShape shape_wr(mat_weights_cols, mat_weights_rows);
+ TensorInfo info_wr(shape_wr, 1, weights->info()->data_type(), weights->info()->fixed_point_position());
+
+ _weights_reshaped.allocator()->init(info_wr);
+ _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped);
+ _weights_transposed_kernel.configure(&_weights_reshaped, output);
+ _weights_reshaped.allocator()->allocate();
+ }
+ else
+ {
+ _weights_reshape_kernel.configure(weights, biases, output);
+ }
+}
+
+void NEConvolutionLayerReshapeWeights::run()
+{
+ NEScheduler::get().schedule(&_weights_reshape_kernel, 3);
+ if(_transpose1xW)
+ {
+ NEScheduler::get().schedule(&_weights_transposed_kernel, Window::DimY);
+ }
+}
+
+NEConvolutionLayer::NEConvolutionLayer()
+ : _input_im2col_kernel(), _input_interleave_kernel(), _reshape_weights(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(),
+ _gemm_output(), _has_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false)
+{
+}
+
+void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights, output);
+ ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2));
+ ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
+
+ if(biases != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);
+ ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && biases->info()->dimension(0) != weights->info()->dimension(3));
+ ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
+ }
+
+ const DataType dt = input->info()->data_type();
+ const int fixed_point_position = input->info()->fixed_point_position();
+
+ _has_bias = (biases != nullptr);
+ _are_weights_reshaped = weights_info.are_reshaped();
+
+ // Get parameters from conv_info
+ unsigned int stride_x = 0;
+ unsigned int stride_y = 0;
+ unsigned int pad_x = 0;
+ unsigned int pad_y = 0;
+ std::tie(stride_x, stride_y) = conv_info.stride();
+ std::tie(pad_x, pad_y) = conv_info.pad();
+
+ // Get convolved dimensions
+ unsigned int conv_w = 0;
+ unsigned int conv_h = 0;
+
+ const unsigned int kernel_width = (_are_weights_reshaped) ? weights_info.kernel_size() : weights->info()->dimension(0);
+ std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width,
+ stride_x, stride_y, pad_x, pad_y, conv_info.round());
+ ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one");
+
+ // Check if its a "fully connected" convolution
+ _is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1));
+
+ unsigned int mat_weights_cols = weights->info()->dimension(3);
+ unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + (_has_bias ? 1 : 0);
+
+ // Reshape weights if needed
+ if(_are_weights_reshaped)
+ {
+ mat_weights_cols = output->info()->dimension(2);
+ const unsigned int quarter_reshaped_cols = weights->info()->dimension(0) / 4;
+ mat_weights_rows = (_has_bias ? 1 + quarter_reshaped_cols : quarter_reshaped_cols);
+ }
+ else
+ {
+ if(_is_fully_connected_convolution)
+ {
+ // Create tensor to store the reshaped weights
+ TensorShape shape_wr(mat_weights_cols, mat_weights_rows);
+ TensorInfo info_wr(shape_wr, 1, dt, fixed_point_position);
+ _weights_reshaped.allocator()->init(info_wr);
+ _reshape_weights.configure(weights, biases, &_weights_reshaped, false /* 1xW transpose */);
+ }
+ else
+ {
+ // Create tensor to store transposed weights
+ const float transpose_width = 16.0f / input->info()->element_size();
+ TensorShape shape_wt(mat_weights_rows * static_cast<unsigned int>(transpose_width), static_cast<unsigned int>(std::ceil(mat_weights_cols / transpose_width)));
+ TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position);
+ _weights_reshaped.allocator()->init(info_wt);
+ _reshape_weights.configure(weights, biases, &_weights_reshaped, true /* 1xW transpose */);
+ }
+ weights = &_weights_reshaped;
+ }
+
+ // Create tensor to store im2col reshaped inputs
+ const unsigned int mat_input_cols = mat_weights_rows;
+ const unsigned int mat_input_rows = conv_w * conv_h;
+ TensorShape shape_im2col = input->info()->tensor_shape();
+ shape_im2col.set(0, mat_input_cols);
+ shape_im2col.set(1, mat_input_rows);
+ shape_im2col.set(2, 1);
+ _input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position));
+
+ // Create tensor (interleave) to prepare input tensor for GEMM
+ if(!_is_fully_connected_convolution)
+ {
+ TensorShape shape_interleaved = shape_im2col;
+ shape_interleaved.set(0, shape_interleaved.x() * 4);
+ shape_interleaved.set(1, std::ceil(shape_interleaved.y() / 4.f));
+ _input_interleaved_reshaped.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position));
+ }
+
+ // Create GEMM output tensor
+ TensorShape shape_gemm = _input_im2col_reshaped.info()->tensor_shape();
+ shape_gemm.set(0, mat_weights_cols);
+ shape_gemm.set(1, mat_input_rows);
+ _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, dt, fixed_point_position));
+
+ // Configure kernels
+ _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias);
+ if(_is_fully_connected_convolution)
+ {
+ _mm_kernel.configure(&_input_im2col_reshaped, weights, &_gemm_output, 1.0f);
+ }
+ else
+ {
+ _input_interleave_kernel.configure(&_input_im2col_reshaped, &_input_interleaved_reshaped);
+ _mm_kernel.configure(&_input_interleaved_reshaped, weights, &_gemm_output, 1.0f);
+ }
+ _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h));
+
+ // Allocate intermediate tensor
+ if(!_are_weights_reshaped)
+ {
+ _weights_reshaped.allocator()->allocate();
+ }
+ _input_im2col_reshaped.allocator()->allocate();
+ if(!_is_fully_connected_convolution)
+ {
+ _input_interleaved_reshaped.allocator()->allocate();
+ }
+ _gemm_output.allocator()->allocate();
+}
+
+void NEConvolutionLayer::run()
+{
+ // Run weights reshaping (Runs once for every configure)
+ if(!_are_weights_reshaped)
+ {
+ _are_weights_reshaped = true;
+ _reshape_weights.run();
+ }
+
+ // Run input reshaping
+ NEScheduler::get().schedule(&_input_im2col_kernel, Window::DimY);
+ if(!_is_fully_connected_convolution)
+ {
+ // Run interleave
+ NEScheduler::get().schedule(&_input_interleave_kernel, Window::DimY);
+ }
+
+ // Runs matrix multiply on reshaped matrices
+ NEScheduler::get().schedule(&_mm_kernel, Window::DimY);
+
+ // Reshape output matrix
+ NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEDepthConcatenate.cpp b/src/runtime/NEON/functions/NEDepthConcatenate.cpp
new file mode 100644
index 0000000000..7d2c5494a9
--- /dev/null
+++ b/src/runtime/NEON/functions/NEDepthConcatenate.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h"
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NEDepthConcatenate::NEDepthConcatenate()
+ : _inputs_vector(), _concat_kernels_vector(), _border_handlers_vector(), _num_inputs(0)
+{
+}
+
+void NEDepthConcatenate::configure(std::vector<ITensor *> inputs_vector, ITensor *output)
+{
+ ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2);
+
+ _num_inputs = inputs_vector.size();
+ _concat_kernels_vector = arm_compute::cpp14::make_unique<NEDepthConcatenateKernel[]>(_num_inputs);
+ _border_handlers_vector = arm_compute::cpp14::make_unique<NEFillBorderKernel[]>(_num_inputs);
+
+ unsigned int depth_offset = 0;
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
+ _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0));
+
+ depth_offset += inputs_vector.at(i)->info()->dimension(2);
+ }
+}
+
+void NEDepthConcatenate::run()
+{
+ for(unsigned i = 0; i < _num_inputs; ++i)
+ {
+ NEScheduler::get().schedule(&_border_handlers_vector[i], Window::DimX);
+ NEScheduler::get().schedule(&_concat_kernels_vector[i], Window::DimX);
+ }
+}
diff --git a/src/runtime/NEON/functions/NEDepthConvert.cpp b/src/runtime/NEON/functions/NEDepthConvert.cpp
new file mode 100644
index 0000000000..a339cae316
--- /dev/null
+++ b/src/runtime/NEON/functions/NEDepthConvert.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEDepthConvert::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F32);
+ ARM_COMPUTE_ERROR_ON(input == output);
+ ARM_COMPUTE_ERROR_ON(input->info()->data_type() == output->info()->data_type());
+
+ auto k = arm_compute::cpp14::make_unique<NEDepthConvertKernel>();
+ k->configure(input, output, policy, shift);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEDerivative.cpp b/src/runtime/NEON/functions/NEDerivative.cpp
new file mode 100644
index 0000000000..2887c13233
--- /dev/null
+++ b/src/runtime/NEON/functions/NEDerivative.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEDerivative.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NEDerivative::NEDerivative()
+ : _kernel(), _border_handler()
+{
+}
+
+void NEDerivative::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
+
+ _kernel.configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _border_handler.configure(input, 1, border_mode, PixelValue(constant_border_value));
+}
+
+void NEDerivative::run()
+{
+ _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEDilate.cpp b/src/runtime/NEON/functions/NEDilate.cpp
new file mode 100644
index 0000000000..0c016f14f9
--- /dev/null
+++ b/src/runtime/NEON/functions/NEDilate.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEDilate.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
+#include "arm_compute/core/PixelValue.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEDilate::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+ auto k = arm_compute::cpp14::make_unique<NEDilateKernel>();
+ k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
new file mode 100644
index 0000000000..3f3e7710fb
--- /dev/null
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+#include <cmath>
+#include <tuple>
+
+using namespace arm_compute;
+
+NEDirectConvolutionLayer::NEDirectConvolutionLayer()
+ : _accumulate_bias_kernel(), _conv_kernel(), _input_border_handler(), _accumulator()
+{
+}
+
+void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &conv_info)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32);
+
+ // Free accumulator
+ if(_accumulator.buffer() != nullptr)
+ {
+ _accumulator.allocator()->free();
+ }
+
+ // Allocate the intermediate accumulator tensor in case of fixed point input
+ if(output->info()->data_type() == DataType::QS8)
+ {
+ _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::QS16, output->info()->fixed_point_position()));
+ _conv_kernel.configure(input, weights, &_accumulator, conv_info);
+ _accumulate_bias_kernel.configure(&_accumulator, bias, output);
+ _accumulator.allocator()->allocate();
+ }
+ else
+ {
+ _conv_kernel.configure(input, weights, output, conv_info);
+ _accumulate_bias_kernel.configure(output, bias);
+ }
+
+ // Add zero padding XY
+ _input_border_handler.configure(input, _conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0));
+}
+
+void NEDirectConvolutionLayer::run()
+{
+ _input_border_handler.run(_input_border_handler.window());
+
+ NEScheduler::get().schedule(&_conv_kernel, Window::DimZ);
+ NEScheduler::get().schedule(&_accumulate_bias_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp
new file mode 100644
index 0000000000..f6ec677e44
--- /dev/null
+++ b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h"
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NEEqualizeHistogram::NEEqualizeHistogram()
+ : _histogram_kernel(), _cd_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8)
+{
+}
+
+void NEEqualizeHistogram::configure(const IImage *input, IImage *output)
+{
+ ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
+ ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+
+ // Configure kernels
+ _histogram_kernel.configure(input, &_hist);
+ _cd_histogram_kernel.configure(input, &_hist, &_cum_dist, &_cd_lut);
+ _map_histogram_kernel.configure(input, &_cd_lut, output);
+}
+
+void NEEqualizeHistogram::run()
+{
+ // Calculate histogram of input.
+ NEScheduler::get().schedule(&_histogram_kernel, Window::DimY);
+
+ // Calculate cumulative distribution of histogram and create LUT.
+ _cd_histogram_kernel.run(_cd_histogram_kernel.window());
+
+ // Map input to output using created LUT.
+ NEScheduler::get().schedule(&_map_histogram_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEErode.cpp b/src/runtime/NEON/functions/NEErode.cpp
new file mode 100644
index 0000000000..9b011db845
--- /dev/null
+++ b/src/runtime/NEON/functions/NEErode.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEErode.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEErodeKernel.h"
+#include "arm_compute/core/PixelValue.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEErode::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+ auto k = arm_compute::cpp14::make_unique<NEErodeKernel>();
+ k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NEFastCorners.cpp b/src/runtime/NEON/functions/NEFastCorners.cpp
new file mode 100644
index 0000000000..33a58f1904
--- /dev/null
+++ b/src/runtime/NEON/functions/NEFastCorners.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEFastCorners.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/Array.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+using namespace arm_compute;
+
+NEFastCorners::NEFastCorners()
+ : _fast_corners_kernel(),
+ _border_handler(),
+ _nonmax_kernel(),
+ _fill_kernel(),
+ _output(),
+ _suppressed(),
+ _non_max(false)
+{
+}
+
+void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppression, KeyPointArray *corners,
+ BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
+ ARM_COMPUTE_ERROR_ON(BorderMode::UNDEFINED != border_mode);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON(nullptr == corners);
+ ARM_COMPUTE_ERROR_ON(threshold < 1 && threshold > 255);
+
+ _non_max = nonmax_suppression;
+
+ TensorInfo tensor_info(input->info()->tensor_shape(), Format::U8);
+ _output.allocator()->init(tensor_info);
+
+ // If border is UNDEFINED _fast_corners_kernel will operate in xwindow (3,
+ // width - 3) and ywindow (3, height -3) so the output image will leave the
+ // pixels on the borders unchanged. This is reflected in the valid region
+ // of the output. The non maxima suppression is only run on the valid
+ // pixels.
+ _fast_corners_kernel.configure(input, &_output, threshold, nonmax_suppression, BorderMode::UNDEFINED == border_mode);
+ _border_handler.configure(input, _fast_corners_kernel.border_size(), border_mode, constant_border_value);
+
+ if(!_non_max)
+ {
+ _fill_kernel.configure(&_output, 1 /* we keep all texels >0 */, corners);
+ }
+ else
+ {
+ _suppressed.allocator()->init(tensor_info);
+ _nonmax_kernel.configure(&_output, &_suppressed, BorderMode::UNDEFINED == border_mode);
+ _fill_kernel.configure(&_suppressed, 1 /* we keep all texels >0 */, corners);
+
+ // Allocate intermediate tensors
+ _suppressed.allocator()->allocate();
+ }
+
+ // Allocate intermediate tensors
+ _output.allocator()->allocate();
+}
+
+void NEFastCorners::run()
+{
+ _border_handler.run(_border_handler.window());
+
+ NEScheduler::get().schedule(&_fast_corners_kernel, Window::DimY);
+
+ if(_non_max)
+ {
+ NEScheduler::get().schedule(&_nonmax_kernel, Window::DimY);
+ }
+
+ NEScheduler::get().schedule(&_fill_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEFillBorder.cpp b/src/runtime/NEON/functions/NEFillBorder.cpp
new file mode 100644
index 0000000000..e884f4a668
--- /dev/null
+++ b/src/runtime/NEON/functions/NEFillBorder.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEFillBorder.h"
+
+#include "arm_compute/core/Window.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+void NEFillBorder::configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
+{
+ _border_handler.configure(input, border_width, border_mode, constant_border_value);
+}
+
+void NEFillBorder::run()
+{
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+}
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
new file mode 100644
index 0000000000..abb41e9f70
--- /dev/null
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
+
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+#include <algorithm>
+#include <cmath>
+
+using namespace arm_compute;
+
+NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights()
+ : _transpose_kernel(), _transpose1xW_kernel(), _transpose_output(), _transpose_weights(false), _is_batched_fc_layer(false)
+{
+}
+
+void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON(output == nullptr);
+ ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != 2);
+ ARM_COMPUTE_ERROR_ON((transpose_weights == false) && (is_batched_fc_layer == false));
+
+ const DataType dt = input->info()->data_type();
+ const int fixed_point_position = input->info()->fixed_point_position();
+
+ _transpose_weights = transpose_weights;
+ _is_batched_fc_layer = is_batched_fc_layer;
+
+ // Check if we need to transpose the weights
+ if(_transpose_weights)
+ {
+ if(_is_batched_fc_layer)
+ {
+ // Initialize the output tensor for transpose
+ TensorShape shape_transposed(input->info()->dimension(1), input->info()->dimension(0));
+ _transpose_output.allocator()->init(TensorInfo(shape_transposed, 1, dt, fixed_point_position));
+ _transpose_kernel.configure(input, &_transpose_output);
+
+ // Configure transpose 1xW kernel
+ _transpose1xW_kernel.configure(&_transpose_output, output);
+
+ // Allocate temporary tensor used for transposing the weights
+ _transpose_output.allocator()->allocate();
+ }
+ else
+ {
+ _transpose_kernel.configure(input, output);
+ }
+ }
+ else
+ {
+ if(_is_batched_fc_layer)
+ {
+ // Configure transpose 1xW kernel
+ _transpose1xW_kernel.configure(input, output);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Configuration transpose_weights=false & is_batched_fc_layer=false not supported");
+ }
+ }
+}
+
+void NEFullyConnectedLayerReshapeWeights::run()
+{
+ if(_transpose_weights)
+ {
+ NEScheduler::get().schedule(&_transpose_kernel, Window::DimY);
+ }
+ if(_is_batched_fc_layer)
+ {
+ NEScheduler::get().schedule(&_transpose1xW_kernel, Window::DimY);
+ }
+}
+
+NEFullyConnectedLayer::NEFullyConnectedLayer()
+ : _im2col_kernel(), _reshape_weights_kernel(), _interleave4x4_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _interleave4x4_output(), _reshape_weights_output(),
+ _are_weights_reshaped(false), _is_fc_after_conv(false), _is_batched_fc_layer(false), _accumulate_biases(false)
+{
+}
+
+void NEFullyConnectedLayer::configure_conv_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output)
+{
+ ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2) * (16 / weights->info()->element_size())));
+
+ const DataType dt = input->info()->data_type();
+ const int fixed_point_position = input->info()->fixed_point_position();
+
+ // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
+
+ // Initialize output tensor for im2col
+ TensorShape shape_im2col;
+ shape_im2col.set(0, input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2));
+ shape_im2col.set(1, input->info()->dimension(3));
+ shape_im2col.set(2, input->info()->dimension(4));
+ shape_im2col.set(3, input->info()->dimension(5));
+ _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position));
+
+ // Initialize output tensor for interleave 4x4
+ TensorShape shape_interleaved = _im2col_output.info()->tensor_shape();
+ shape_interleaved.set(0, shape_interleaved.x() * 4);
+ shape_interleaved.set(1, std::ceil(static_cast<float>(shape_interleaved.y()) / 4));
+ _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position));
+
+ // Configure im2col kernel
+ _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false);
+
+ // Configure interleave4x4 kernel
+ _interleave4x4_kernel.configure(&_im2col_output, &_interleave4x4_output);
+
+ // Configure matrix multiply kernel
+ _mm_kernel.configure(&_interleave4x4_output, weights, output, 1.0f);
+
+ // Allocate the tensors once all the configure methods have been called
+ _im2col_output.allocator()->allocate();
+ _interleave4x4_output.allocator()->allocate();
+}
+
+void NEFullyConnectedLayer::configure_fc_fc_wb(const ITensor *input, const ITensor *weights, ITensor *output)
+{
+ const DataType dt = input->info()->data_type();
+ const int fixed_point_position = input->info()->fixed_point_position();
+
+ // Initialize output tensor for interleave 4x4
+ TensorShape shape_interleaved = input->info()->tensor_shape();
+ shape_interleaved.set(0, shape_interleaved.x() * 4);
+ shape_interleaved.set(1, std::ceil(static_cast<float>(shape_interleaved.y()) / 4));
+ _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position));
+
+ // Configure interleave4x4 kernel
+ _interleave4x4_kernel.configure(input, &_interleave4x4_output);
+
+ // Configure matrix multiply kernel
+ _mm_kernel.configure(&_interleave4x4_output, weights, output, 1.0f);
+
+ // Allocate the tensors once all the configure methods have been called
+ _interleave4x4_output.allocator()->allocate();
+}
+
+void NEFullyConnectedLayer::configure_conv_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output)
+{
+ ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+
+ const DataType dt = input->info()->data_type();
+ const int fixed_point_position = input->info()->fixed_point_position();
+
+ // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
+
+ // Initialize output tensor for im2col
+ TensorShape shape_im2col;
+ shape_im2col.set(0, input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2));
+ shape_im2col.set(1, 1);
+ _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position));
+
+ // Configure im2col kernel
+ _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false);
+
+ // Configure matrix multiply kernel
+ _mm_kernel.configure(&_im2col_output, weights, output, 1.0f);
+
+ // Allocate the output tensor for im2col once all the configure methods have been called
+ _im2col_output.allocator()->allocate();
+}
+
+void NEFullyConnectedLayer::configure_fc_fc_nb(const ITensor *input, const ITensor *weights, ITensor *output)
+{
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
+
+ // Configure matrix multiply kernel
+ _mm_kernel.configure(input, weights, output, 1.0f);
+}
+
+void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights, bool are_weights_reshaped)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
+ ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() != 2);
+
+ const DataType dt = input->info()->data_type();
+ const int fixed_point_position = input->info()->fixed_point_position();
+
+ _are_weights_reshaped = are_weights_reshaped;
+ _is_fc_after_conv = true;
+ _is_batched_fc_layer = false;
+ _accumulate_biases = false;
+
+ if(biases != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+
+ _accumulate_biases = true;
+
+ // Configure accumulate biases kernel
+ _accumulate_biases_kernel.configure(output, biases);
+ }
+
+ // With the Fully Connected layer we can have 4 different cases:
+ // 1) Convolution layer -> Fully Connected layer without batches
+ // 2) Fully Connected layer -> Fully Connected layer without batches
+ // 3) Convolution layer -> Fully Connected layer with batches
+ // 4) Fully Connected layer -> Fully Connected layer with batches
+
+ // Check if we have a fully connected layer with batches
+ _is_batched_fc_layer = (output->info()->dimension(1) > 1);
+
+ const ITensor *weights_to_use = weights;
+
+ if(!are_weights_reshaped)
+ {
+ if((transpose_weights || _is_batched_fc_layer))
+ {
+ weights_to_use = &_reshape_weights_output;
+
+ if(transpose_weights)
+ {
+ if(_is_batched_fc_layer)
+ {
+ const float transpose_width = 16.0f / input->info()->element_size();
+ TensorShape shape_wt(weights->info()->dimension(0) * static_cast<unsigned int>(transpose_width), static_cast<unsigned int>(std::ceil(weights->info()->dimension(1) / transpose_width)));
+ TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position);
+ _reshape_weights_output.allocator()->init(info_wt);
+ }
+ else
+ {
+ TensorShape shape_wt(weights->info()->dimension(1), weights->info()->dimension(0));
+ TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position);
+ _reshape_weights_output.allocator()->init(info_wt);
+ }
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR_ON(!_is_batched_fc_layer);
+
+ const float transpose_width = 16.0f / input->info()->element_size();
+ TensorShape shape_wt(weights->info()->dimension(1) * static_cast<unsigned int>(transpose_width), static_cast<unsigned int>(std::ceil(weights->info()->dimension(0) / transpose_width)));
+ TensorInfo info_wt(shape_wt, 1, dt, fixed_point_position);
+ _reshape_weights_output.allocator()->init(info_wt);
+ }
+
+ // Reshape the weights
+ _reshape_weights_kernel.configure(weights, &_reshape_weights_output, transpose_weights, _is_batched_fc_layer);
+ }
+ }
+
+ if(_is_batched_fc_layer)
+ {
+ _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
+ input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
+
+ if(_is_fc_after_conv)
+ {
+ // Fully Connected layer after a Convolution Layer with batches
+ configure_conv_fc_wb(input, weights_to_use, output);
+ }
+ else
+ {
+ // Fully Connected layer after a Fully Connected Layer with batches
+ configure_fc_fc_wb(input, weights_to_use, output);
+ }
+ }
+ else
+ {
+ // In case of not batched fully connected layer, the weights will not be reshaped using transposed1xW
+ _is_fc_after_conv = ((weights_to_use->info()->dimension(1)) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)));
+
+ if(_is_fc_after_conv)
+ {
+ // Fully Connected layer after a Convolution Layer without batches
+ configure_conv_fc_nb(input, weights_to_use, output);
+ }
+ else
+ {
+ // Fully Connected layer after a Fully Connected Layer without batches
+ configure_fc_fc_nb(input, weights_to_use, output);
+ }
+ }
+
+ // Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called
+ if(!are_weights_reshaped)
+ {
+ if(transpose_weights || _is_batched_fc_layer)
+ {
+ // Allocate the tensor for the weights reshaped
+ _reshape_weights_output.allocator()->allocate();
+ }
+ }
+}
+
+void NEFullyConnectedLayer::run()
+{
+ // Reshape of the weights (happens only once)
+ if(!_are_weights_reshaped)
+ {
+ _are_weights_reshaped = true;
+ _reshape_weights_kernel.run();
+ }
+
+ // Linearize input if comes from a convolutional layer
+ if(_is_fc_after_conv)
+ {
+ NEScheduler::get().schedule(&_im2col_kernel, Window::DimY);
+ }
+
+ // Interleave input
+ if(_is_batched_fc_layer)
+ {
+ NEScheduler::get().schedule(&_interleave4x4_kernel, Window::DimY);
+ }
+
+ // Run matrix multiply
+ NEScheduler::get().schedule(&_mm_kernel, _is_batched_fc_layer ? Window::DimY : Window::DimX);
+
+ // Accumulate biases if provided
+ if(_accumulate_biases)
+ {
+ NEScheduler::get().schedule(&_accumulate_biases_kernel, Window::DimY);
+ }
+}
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
new file mode 100644
index 0000000000..15d5f4effb
--- /dev/null
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include <cmath>
+
+using namespace arm_compute;
+
+NEGEMM::NEGEMM()
+ : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _run_vector_matrix_multiplication(false), _run_addition(false)
+{
+}
+
+void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32, DataType::F16, DataType::QS8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::F32, DataType::F16, DataType::QS8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(d, 1, DataType::F32, DataType::F16, DataType::QS8);
+
+ if(c != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(c, 1, DataType::F32, DataType::F16, DataType::QS8);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, c);
+ ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(1) != c->info()->dimension(1), "The C matrix must have the same number of rows as the matrix A");
+ ARM_COMPUTE_ERROR_ON_MSG(b->info()->dimension(0) != c->info()->dimension(0), "The C matrix must have the same number of columns as the matrix B");
+ ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(0) != d->info()->dimension(0), "The C matrix must have the same number of rows as the output matrix");
+ ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(1) != d->info()->dimension(1), "The C matrix must have the same number of columns as the output matrix");
+ }
+
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, d);
+ ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B");
+
+ // Check if the first input tensor is a vector. If so, all the kernels for reshaping the tensors can be skipped
+ if((a->info()->dimension(1) == 1))
+ {
+ _run_vector_matrix_multiplication = true;
+
+ // Configure the matrix multiply kernel
+ _mm_kernel.configure(a, b, d, alpha);
+ }
+ else
+ {
+ _run_vector_matrix_multiplication = false;
+
+ TensorShape shape_tmp_a = a->info()->tensor_shape();
+ TensorShape shape_tmp_b = b->info()->tensor_shape();
+
+ shape_tmp_a.set(0, a->info()->dimension(0) * 4);
+ shape_tmp_a.set(1, std::ceil(a->info()->dimension(1) / 4.0f));
+
+ switch(a->info()->data_type())
+ {
+ case DataType::F32:
+ {
+ shape_tmp_b.set(0, b->info()->dimension(1) * 4);
+ shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 4.0f));
+ break;
+ }
+ case DataType::F16:
+#ifdef ARM_COMPUTE_ENABLE_FP16
+ {
+ shape_tmp_b.set(0, b->info()->dimension(1) * 8);
+ shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 8.0f));
+ break;
+ }
+#endif
+ case DataType::QS8:
+ {
+ shape_tmp_b.set(0, b->info()->dimension(1) * 16);
+ shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 16.0f));
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR_ON("Data type not supported");
+ }
+ }
+
+ TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type(), a->info()->fixed_point_position());
+ TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), a->info()->fixed_point_position());
+
+ _tmp_a.allocator()->init(info_a);
+ _tmp_b.allocator()->init(info_b);
+
+ // Configure interleave kernel
+ _interleave_kernel.configure(a, &_tmp_a);
+
+ // Configure transpose kernel
+ _transpose_kernel.configure(b, &_tmp_b);
+
+ // Configure matrix multiplication kernel
+ _mm_kernel.configure(&_tmp_a, &_tmp_b, d, alpha);
+
+ // Allocate once the all configure methods have been called
+ _tmp_a.allocator()->allocate();
+ _tmp_b.allocator()->allocate();
+ }
+
+ // Configure matrix addition kernel
+ if(beta != 0 && c != nullptr)
+ {
+ _ma_kernel.configure(c, d, beta);
+ _run_addition = true;
+ }
+}
+
+void NEGEMM::run()
+{
+ if(!_run_vector_matrix_multiplication)
+ {
+ // Run interleave kernel
+ NEScheduler::get().schedule(&_interleave_kernel, Window::DimY);
+
+ // Run transpose kernel
+ NEScheduler::get().schedule(&_transpose_kernel, Window::DimY);
+ }
+
+ // Run matrix multiply kernel
+ NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY);
+
+ // Run matrix addition kernel
+ if(_run_addition)
+ {
+ NEScheduler::get().schedule(&_ma_kernel, Window::DimY);
+ }
+}
diff --git a/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp b/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp
new file mode 100644
index 0000000000..4c77c88656
--- /dev/null
+++ b/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+
+using namespace arm_compute;
+
+void NEGEMMInterleave4x4::configure(const ITensor *input, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEGEMMLowp.cpp b/src/runtime/NEON/functions/NEGEMMLowp.cpp
new file mode 100644
index 0000000000..b64f769459
--- /dev/null
+++ b/src/runtime/NEON/functions/NEGEMMLowp.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowp.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+using namespace arm_compute;
+
+NEGEMMLowp::NEGEMMLowp()
+ : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _tmp_a(), _tmp_b()
+{
+}
+
+void NEGEMMLowp::configure(const ITensor *a, const ITensor *b, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, output);
+ ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B");
+ ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(1) != output->info()->dimension(1), "The C matrix must have the same number of rows as the matrix A");
+ ARM_COMPUTE_ERROR_ON_MSG(b->info()->dimension(0) != output->info()->dimension(0), "The C matrix must have the same number of columns as the matrix C");
+
+ /* The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / 4.0f) ] */
+ TensorShape shape_tmp_a = a->info()->tensor_shape();
+ shape_tmp_a.set(0, a->info()->dimension(0) * 4);
+ shape_tmp_a.set(1, std::ceil(a->info()->dimension(1) / 4.f));
+
+ TensorShape shape_tmp_b = b->info()->tensor_shape();
+ shape_tmp_b.set(0, b->info()->dimension(1) * 16);
+ shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 16.f));
+
+ TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type());
+ TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type());
+ _tmp_a.allocator()->init(info_a);
+ _tmp_b.allocator()->init(info_b);
+
+ _interleave_kernel.configure(a, &_tmp_a);
+ _transpose_kernel.configure(b, &_tmp_b);
+ _mm_kernel.configure(&_tmp_a, &_tmp_b, output, a_offset, b_offset, output_offset, output_mult_int, shift);
+
+ _tmp_a.allocator()->allocate();
+ _tmp_b.allocator()->allocate();
+}
+
+void NEGEMMLowp::run()
+{
+ /* Run interleave kernel */
+ NEScheduler::get().schedule(&_interleave_kernel, Window::DimY);
+
+ /* Run transpose kernel */
+ NEScheduler::get().schedule(&_transpose_kernel, Window::DimY);
+
+ /* Run matrix multiply kernel */
+ NEScheduler::get().schedule(&_mm_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp
new file mode 100644
index 0000000000..dc40ecec14
--- /dev/null
+++ b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+
+using namespace arm_compute;
+
+void NEGEMMTranspose1xW::configure(const ITensor *input, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEGEMMTranspose1xWKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEGaussian3x3.cpp b/src/runtime/NEON/functions/NEGaussian3x3.cpp
new file mode 100644
index 0000000000..95ba5cbdf9
--- /dev/null
+++ b/src/runtime/NEON/functions/NEGaussian3x3.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h"
+#include "arm_compute/core/PixelValue.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEGaussian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+ auto k = arm_compute::cpp14::make_unique<NEGaussian3x3Kernel>();
+ k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp
new file mode 100644
index 0000000000..5ccc765966
--- /dev/null
+++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+using namespace arm_compute;
+
+NEGaussian5x5::NEGaussian5x5()
+ : _kernel_hor(), _kernel_vert(), _tmp(), _border_handler()
+{
+}
+
+void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+ // Init temporary buffer
+ TensorInfo tensor_info(input->info()->tensor_shape(), Format::S16);
+ _tmp.allocator()->init(tensor_info);
+
+ // Create and configure kernels for the two passes
+ _kernel_hor.configure(input, &_tmp, border_mode == BorderMode::UNDEFINED);
+ _kernel_vert.configure(&_tmp, output, border_mode == BorderMode::UNDEFINED);
+
+ _tmp.allocator()->allocate();
+
+ _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+}
+
+void NEGaussian5x5::run()
+{
+ _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_kernel_hor, Window::DimY);
+ NEScheduler::get().schedule(&_kernel_vert, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEGaussianPyramid.cpp b/src/runtime/NEON/functions/NEGaussianPyramid.cpp
new file mode 100644
index 0000000000..e1d64f11f6
--- /dev/null
+++ b/src/runtime/NEON/functions/NEGaussianPyramid.cpp
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
+#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
+#include "arm_compute/runtime/Pyramid.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include <cstddef>
+
+using namespace arm_compute;
+
+NEGaussianPyramid::NEGaussianPyramid()
+ : _input(nullptr), _pyramid(nullptr), _tmp()
+{
+}
+
+NEGaussianPyramidHalf::NEGaussianPyramidHalf()
+ : _border_handler(), _horizontal_reduction(), _vertical_reduction()
+{
+}
+
+void NEGaussianPyramidHalf::configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON(nullptr == pyramid);
+ ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions());
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width());
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height());
+ ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_HALF != pyramid->info()->scale());
+
+ /* Get number of pyramid levels */
+ const size_t num_levels = pyramid->info()->num_levels();
+
+ _input = input;
+ _pyramid = pyramid;
+
+ if(num_levels > 1)
+ {
+ _border_handler = arm_compute::cpp14::make_unique<NEFillBorderKernel[]>(num_levels - 1);
+ _horizontal_reduction = arm_compute::cpp14::make_unique<NEGaussianPyramidHorKernel[]>(num_levels - 1);
+ _vertical_reduction = arm_compute::cpp14::make_unique<NEGaussianPyramidVertKernel[]>(num_levels - 1);
+
+ // Apply half scale to the X dimension of the tensor shape
+ TensorShape tensor_shape = pyramid->info()->tensor_shape();
+ tensor_shape.set(0, (pyramid->info()->width() + 1) * SCALE_PYRAMID_HALF);
+
+ PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_HALF, tensor_shape, Format::S16);
+ _tmp.init(pyramid_info);
+
+ for(unsigned int i = 0; i < num_levels - 1; ++i)
+ {
+ /* Configure horizontal kernel */
+ _horizontal_reduction[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode == BorderMode::UNDEFINED);
+
+ /* Configure vertical kernel */
+ _vertical_reduction[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), border_mode == BorderMode::UNDEFINED);
+
+ /* Configure border */
+ _border_handler[i].configure(_pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value));
+ }
+
+ _tmp.allocate();
+ }
+}
+
+void NEGaussianPyramidHalf::run()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_pyramid == nullptr, "Unconfigured function");
+
+ /* Get number of pyramid levels */
+ const size_t num_levels = _pyramid->info()->num_levels();
+
+ /* The first level of the pyramid has the input image */
+ _pyramid->get_pyramid_level(0)->copy_from(*_input);
+
+ for(unsigned int i = 0; i < num_levels - 1; ++i)
+ {
+ _border_handler[i].run(_border_handler[i].window());
+ NEScheduler::get().schedule(_horizontal_reduction.get() + i, Window::DimY);
+ NEScheduler::get().schedule(_vertical_reduction.get() + i, Window::DimY);
+ }
+}
+
+NEGaussianPyramidOrb::NEGaussianPyramidOrb()
+ : _offsets(), _gaus5x5(), _scale_nearest()
+{
+}
+
+void NEGaussianPyramidOrb::configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON(nullptr == pyramid);
+ ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions());
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width());
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height());
+ ARM_COMPUTE_ERROR_ON(SCALE_PYRAMID_ORB != pyramid->info()->scale());
+
+ /* Get number of pyramid levels */
+ const size_t num_levels = pyramid->info()->num_levels();
+
+ _input = input;
+ _pyramid = pyramid;
+
+ if(num_levels > 1)
+ {
+ _gaus5x5 = arm_compute::cpp14::make_unique<NEGaussian5x5[]>(num_levels - 1);
+ _scale_nearest = arm_compute::cpp14::make_unique<NEScaleKernel[]>(num_levels - 1);
+ _offsets = arm_compute::cpp14::make_unique<Image[]>(num_levels - 1);
+
+ PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_ORB, pyramid->info()->tensor_shape(), Format::U8);
+ _tmp.init(pyramid_info);
+
+ for(unsigned int i = 0; i < num_levels - 1; ++i)
+ {
+ const size_t width = _pyramid->get_pyramid_level(i + 1)->info()->dimension(0);
+ const size_t height = _pyramid->get_pyramid_level(i + 1)->info()->dimension(1);
+
+ /* Allocate Image for the offsets used by NEAREST interpolation */
+ TensorInfo tensor_info(TensorShape(width, height), Format::S32);
+ _offsets[i].allocator()->init(tensor_info);
+
+ /* Configure gaussian 5x5 */
+ _gaus5x5[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value);
+
+ /* Configure scale image kernel */
+ _scale_nearest[i].configure(_tmp.get_pyramid_level(i), nullptr, nullptr, _offsets.get() + i, _pyramid->get_pyramid_level(i + 1), InterpolationPolicy::NEAREST_NEIGHBOR,
+ border_mode == BorderMode::UNDEFINED);
+
+ _offsets[i].allocator()->allocate();
+ }
+
+ _tmp.allocate();
+ }
+}
+
+void NEGaussianPyramidOrb::run()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_pyramid == nullptr, "Unconfigured function");
+
+ /* Get number of pyramid levels */
+ const size_t num_levels = _pyramid->info()->num_levels();
+
+ /* The first level of the pyramid has the input image */
+ _pyramid->get_pyramid_level(0)->copy_from(*_input);
+
+ for(unsigned int i = 0; i < num_levels - 1; ++i)
+ {
+ _gaus5x5[i].run();
+ NEScheduler::get().schedule(_scale_nearest.get() + i, Window::DimY);
+ }
+}
diff --git a/src/runtime/NEON/functions/NEHOGDescriptor.cpp b/src/runtime/NEON/functions/NEHOGDescriptor.cpp
new file mode 100644
index 0000000000..a592f53d44
--- /dev/null
+++ b/src/runtime/NEON/functions/NEHOGDescriptor.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/HOGInfo.h"
+#include "arm_compute/core/Size2D.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NEHOGDescriptor::NEHOGDescriptor()
+ : _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space()
+{
+}
+
+void NEHOGDescriptor::configure(ITensor *input, ITensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON(nullptr == output);
+ ARM_COMPUTE_ERROR_ON(nullptr == hog);
+
+ const HOGInfo *hog_info = hog->info();
+ const size_t width = input->info()->dimension(Window::DimX);
+ const size_t height = input->info()->dimension(Window::DimY);
+ const size_t num_bins = hog_info->num_bins();
+
+ Size2D cell_size = hog_info->cell_size();
+
+ // Calculate number of cells along the x and y directions for the hog_space
+ const size_t num_cells_x = width / cell_size.width;
+ const size_t num_cells_y = height / cell_size.height;
+
+ // TensorShape of the input image
+ const TensorShape &shape_img = input->info()->tensor_shape();
+
+ // TensorShape of the hog space
+ TensorShape shape_hog_space = input->info()->tensor_shape();
+ shape_hog_space.set(Window::DimX, num_cells_x);
+ shape_hog_space.set(Window::DimY, num_cells_y);
+
+ // Allocate memory for magnitude, phase and hog space
+ TensorInfo info_mag(shape_img, Format::S16);
+ _mag.allocator()->init(info_mag);
+
+ TensorInfo info_phase(shape_img, Format::U8);
+ _phase.allocator()->init(info_phase);
+
+ TensorInfo info_space(shape_hog_space, num_bins, DataType::F32);
+ _hog_space.allocator()->init(info_space);
+
+ // Initialise gradient kernel
+ _gradient.configure(input, &_mag, &_phase, hog_info->phase_type(), border_mode, constant_border_value);
+
+ // Initialise orientation binning kernel
+ _orient_bin.configure(&_mag, &_phase, &_hog_space, hog->info());
+
+ // Initialize HOG norm kernel
+ _block_norm.configure(&_hog_space, output, hog->info());
+
+ // Allocate intermediate tensors
+ _mag.allocator()->allocate();
+ _phase.allocator()->allocate();
+ _hog_space.allocator()->allocate();
+}
+
+void NEHOGDescriptor::run()
+{
+ // Run gradient
+ _gradient.run();
+
+ // Run orientation binning kernel
+ NEScheduler::get().schedule(&_orient_bin, Window::DimY);
+
+ // Run block normalization kernel
+ NEScheduler::get().schedule(&_block_norm, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEHOGDetector.cpp b/src/runtime/NEON/functions/NEHOGDetector.cpp
new file mode 100644
index 0000000000..e8ed29d0b9
--- /dev/null
+++ b/src/runtime/NEON/functions/NEHOGDetector.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEHOGDetector.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
+
+using namespace arm_compute;
+
+void NEHOGDetector::configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class)
+{
+ auto k = arm_compute::cpp14::make_unique<NEHOGDetectorKernel>();
+ k->configure(input, hog, detection_windows, detection_window_stride, threshold, idx_class);
+ _kernel = std::move(k);
+} \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEHOGGradient.cpp b/src/runtime/NEON/functions/NEHOGGradient.cpp
new file mode 100644
index 0000000000..2f4b8802e3
--- /dev/null
+++ b/src/runtime/NEON/functions/NEHOGGradient.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEHOGGradient.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NEHOGGradient::NEHOGGradient()
+ : _derivative(), _mag_phase(nullptr), _gx(), _gy()
+{
+}
+
+void NEHOGGradient::configure(ITensor *input, ITensor *output_magnitude, ITensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_magnitude, 1, DataType::S16);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_phase, 1, DataType::U8);
+
+ const TensorShape &shape_img = input->info()->tensor_shape();
+
+ // Allocate image memory
+ TensorInfo info(shape_img, Format::S16);
+ _gx.allocator()->init(info);
+ _gy.allocator()->init(info);
+
+ // Initialise derivate kernel
+ _derivative.configure(input, &_gx, &_gy, border_mode, constant_border_value);
+
+ // Initialise magnitude/phase kernel
+ if(PhaseType::UNSIGNED == phase_type)
+ {
+ auto k = arm_compute::cpp14::make_unique<NEMagnitudePhaseKernel<MagnitudeType::L2NORM, PhaseType::UNSIGNED>>();
+ k->configure(&_gx, &_gy, output_magnitude, output_phase);
+ _mag_phase = std::move(k);
+ }
+ else
+ {
+ auto k = arm_compute::cpp14::make_unique<NEMagnitudePhaseKernel<MagnitudeType::L2NORM, PhaseType::SIGNED>>();
+ k->configure(&_gx, &_gy, output_magnitude, output_phase);
+ _mag_phase = std::move(k);
+ }
+
+ // Allocate intermediate tensors
+ _gx.allocator()->allocate();
+ _gy.allocator()->allocate();
+}
+
+void NEHOGGradient::run()
+{
+ // Run derivative
+ _derivative.run();
+
+ // Run magnitude/phase kernel
+ NEScheduler::get().schedule(_mag_phase.get(), Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
new file mode 100644
index 0000000000..173b8f4c42
--- /dev/null
+++ b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/Tensor.h"
+
+using namespace arm_compute;
+
+NEHOGMultiDetection::NEHOGMultiDetection()
+ : _gradient_kernel(), _orient_bin_kernel(), _block_norm_kernel(), _hog_detect_kernel(), _non_maxima_kernel(), _hog_space(), _hog_norm_space(), _detection_windows(), _mag(), _phase(),
+ _non_maxima_suppression(false), _num_orient_bin_kernel(0), _num_block_norm_kernel(0), _num_hog_detect_kernel(0)
+{
+}
+
+void NEHOGMultiDetection::configure(ITensor *input, const IMultiHOG *multi_hog, IDetectionWindowArray *detection_windows, const ISize2DArray *detection_window_strides, BorderMode border_mode,
+ uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(multi_hog);
+ ARM_COMPUTE_ERROR_ON(nullptr == detection_windows);
+ ARM_COMPUTE_ERROR_ON(detection_window_strides->num_values() != multi_hog->num_models());
+
+ const size_t width = input->info()->dimension(Window::DimX);
+ const size_t height = input->info()->dimension(Window::DimY);
+ const TensorShape &shape_img = input->info()->tensor_shape();
+ const size_t num_models = multi_hog->num_models();
+ PhaseType phase_type = multi_hog->model(0)->info()->phase_type();
+
+ size_t prev_num_bins = multi_hog->model(0)->info()->num_bins();
+ Size2D prev_cell_size = multi_hog->model(0)->info()->cell_size();
+ Size2D prev_block_size = multi_hog->model(0)->info()->block_size();
+ Size2D prev_block_stride = multi_hog->model(0)->info()->block_stride();
+
+ /* Check if NEHOGOrientationBinningKernel and NEHOGBlockNormalizationKernel kernels can be skipped for a specific HOG data-object
+ *
+ * 1) NEHOGOrientationBinningKernel and NEHOGBlockNormalizationKernel are skipped if the cell size and the number of bins don't change.
+ * Since "multi_hog" is sorted,it is enough to check the HOG descriptors at level "ith" and level "(i-1)th
+ * 2) NEHOGBlockNormalizationKernel is skipped if the cell size, the number of bins and block size do not change.
+ * Since "multi_hog" is sorted,it is enough to check the HOG descriptors at level "ith" and level "(i-1)th
+ *
+ * @note Since the orientation binning and block normalization kernels can be skipped, we need to keep track of the input to process for each kernel
+ * with "input_orient_bin", "input_hog_detect" and "input_block_norm"
+ */
+ std::vector<size_t> input_orient_bin;
+ std::vector<size_t> input_hog_detect;
+ std::vector<std::pair<size_t, size_t>> input_block_norm;
+
+ input_orient_bin.push_back(0);
+ input_hog_detect.push_back(0);
+ input_block_norm.emplace_back(0, 0);
+
+ for(size_t i = 1; i < num_models; ++i)
+ {
+ size_t cur_num_bins = multi_hog->model(i)->info()->num_bins();
+ Size2D cur_cell_size = multi_hog->model(i)->info()->cell_size();
+ Size2D cur_block_size = multi_hog->model(i)->info()->block_size();
+ Size2D cur_block_stride = multi_hog->model(i)->info()->block_stride();
+
+ if((cur_num_bins != prev_num_bins) || (cur_cell_size.width != prev_cell_size.width) || (cur_cell_size.height != prev_cell_size.height))
+ {
+ prev_num_bins = cur_num_bins;
+ prev_cell_size = cur_cell_size;
+ prev_block_size = cur_block_size;
+ prev_block_stride = cur_block_stride;
+
+ // Compute orientation binning and block normalization kernels. Update input to process
+ input_orient_bin.push_back(i);
+ input_block_norm.emplace_back(i, input_orient_bin.size() - 1);
+ }
+ else if((cur_block_size.width != prev_block_size.width) || (cur_block_size.height != prev_block_size.height) || (cur_block_stride.width != prev_block_stride.width)
+ || (cur_block_stride.height != prev_block_stride.height))
+ {
+ prev_block_size = cur_block_size;
+ prev_block_stride = cur_block_stride;
+
+ // Compute block normalization kernel. Update input to process
+ input_block_norm.emplace_back(i, input_orient_bin.size() - 1);
+ }
+
+ // Update input to process for hog detector kernel
+ input_hog_detect.push_back(input_block_norm.size() - 1);
+ }
+
+ _detection_windows = detection_windows;
+ _non_maxima_suppression = non_maxima_suppression;
+ _num_orient_bin_kernel = input_orient_bin.size(); // Number of NEHOGOrientationBinningKernel kernels to compute
+ _num_block_norm_kernel = input_block_norm.size(); // Number of NEHOGBlockNormalizationKernel kernels to compute
+ _num_hog_detect_kernel = input_hog_detect.size(); // Number of NEHOGDetector functions to compute
+
+ _orient_bin_kernel = arm_compute::cpp14::make_unique<NEHOGOrientationBinningKernel[]>(_num_orient_bin_kernel);
+ _block_norm_kernel = arm_compute::cpp14::make_unique<NEHOGBlockNormalizationKernel[]>(_num_block_norm_kernel);
+ _hog_detect_kernel = arm_compute::cpp14::make_unique<NEHOGDetector[]>(_num_hog_detect_kernel);
+ _non_maxima_kernel = arm_compute::cpp14::make_unique<CPPDetectionWindowNonMaximaSuppressionKernel>();
+ _hog_space = arm_compute::cpp14::make_unique<Tensor[]>(_num_orient_bin_kernel);
+ _hog_norm_space = arm_compute::cpp14::make_unique<Tensor[]>(_num_block_norm_kernel);
+
+ // Allocate tensors for magnitude and phase
+ TensorInfo info_mag(shape_img, Format::S16);
+ _mag.allocator()->init(info_mag);
+
+ TensorInfo info_phase(shape_img, Format::U8);
+ _phase.allocator()->init(info_phase);
+
+ // Initialise gradient kernel
+ _gradient_kernel.configure(input, &_mag, &_phase, phase_type, border_mode, constant_border_value);
+
+ // Configure NETensor for the HOG space and orientation binning kernel
+ for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
+ {
+ const size_t idx_multi_hog = input_orient_bin[i];
+
+ // Get the corresponding cell size and number of bins
+ const Size2D &cell = multi_hog->model(idx_multi_hog)->info()->cell_size();
+ const size_t num_bins = multi_hog->model(idx_multi_hog)->info()->num_bins();
+
+ // Calculate number of cells along the x and y directions for the hog_space
+ const size_t num_cells_x = width / cell.width;
+ const size_t num_cells_y = height / cell.height;
+
+ // TensorShape of hog space
+ TensorShape shape_hog_space = input->info()->tensor_shape();
+ shape_hog_space.set(Window::DimX, num_cells_x);
+ shape_hog_space.set(Window::DimY, num_cells_y);
+
+ // Allocate HOG space
+ TensorInfo info_space(shape_hog_space, num_bins, DataType::F32);
+ _hog_space[i].allocator()->init(info_space);
+
+ // Initialise orientation binning kernel
+ _orient_bin_kernel[i].configure(&_mag, &_phase, _hog_space.get() + i, multi_hog->model(idx_multi_hog)->info());
+ }
+
+ // Configure NETensor for the normalized HOG space and block normalization kernel
+ for(size_t i = 0; i < _num_block_norm_kernel; ++i)
+ {
+ const size_t idx_multi_hog = input_block_norm[i].first;
+ const size_t idx_orient_bin = input_block_norm[i].second;
+
+ // Allocate normalized HOG space
+ TensorInfo tensor_info(*(multi_hog->model(idx_multi_hog)->info()), width, height);
+ _hog_norm_space[i].allocator()->init(tensor_info);
+
+ // Initialize block normalization kernel
+ _block_norm_kernel[i].configure(_hog_space.get() + idx_orient_bin, _hog_norm_space.get() + i, multi_hog->model(idx_multi_hog)->info());
+ }
+
+ // Configure HOG detector kernel
+ for(size_t i = 0; i < _num_hog_detect_kernel; ++i)
+ {
+ const size_t idx_block_norm = input_hog_detect[i];
+
+ _hog_detect_kernel[i].configure(_hog_norm_space.get() + idx_block_norm, multi_hog->model(i), detection_windows, detection_window_strides->at(i), threshold, i);
+ }
+
+ // Configure non maxima suppression kernel
+ _non_maxima_kernel->configure(_detection_windows, min_distance);
+
+ // Allocate intermediate tensors
+ _mag.allocator()->allocate();
+ _phase.allocator()->allocate();
+
+ for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
+ {
+ _hog_space[i].allocator()->allocate();
+ }
+
+ for(size_t i = 0; i < _num_block_norm_kernel; ++i)
+ {
+ _hog_norm_space[i].allocator()->allocate();
+ }
+}
+
+void NEHOGMultiDetection::run()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_detection_windows == nullptr, "Unconfigured function");
+
+ // Reset detection window
+ _detection_windows->clear();
+
+ // Run gradient
+ _gradient_kernel.run();
+
+ // Run orientation binning kernel
+ for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
+ {
+ NEScheduler::get().schedule(_orient_bin_kernel.get() + i, Window::DimY);
+ }
+
+ // Run block normalization kernel
+ for(size_t i = 0; i < _num_block_norm_kernel; ++i)
+ {
+ NEScheduler::get().schedule(_block_norm_kernel.get() + i, Window::DimY);
+ }
+
+ // Run HOG detector kernel
+ for(size_t i = 0; i < _num_hog_detect_kernel; ++i)
+ {
+ _hog_detect_kernel[i].run();
+ }
+
+ // Run non-maxima suppression kernel if enabled
+ if(_non_maxima_suppression)
+ {
+ _non_maxima_kernel->run(_non_maxima_kernel->window());
+ }
+}
diff --git a/src/runtime/NEON/functions/NEHarrisCorners.cpp b/src/runtime/NEON/functions/NEHarrisCorners.cpp
new file mode 100644
index 0000000000..b54fb67ab7
--- /dev/null
+++ b/src/runtime/NEON/functions/NEHarrisCorners.cpp
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/Array.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/NEON/functions/NESobel3x3.h"
+#include "arm_compute/runtime/NEON/functions/NESobel5x5.h"
+#include "arm_compute/runtime/NEON/functions/NESobel7x7.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include <cmath>
+#include <utility>
+
+using namespace arm_compute;
+
+NEHarrisCorners::NEHarrisCorners()
+ : _sobel(), _harris_score(), _non_max_suppr(), _candidates(), _sort_euclidean(), _border_gx(), _border_gy(), _gx(), _gy(), _score(), _nonmax(), _corners_list(), _num_corner_candidates(0)
+{
+}
+
+void NEHarrisCorners::configure(IImage *input, float threshold, float min_dist,
+ float sensitivity, int32_t gradient_size, int32_t block_size, KeyPointArray *corners,
+ BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
+{
+ ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON(!(block_size == 3 || block_size == 5 || block_size == 7));
+
+ const TensorShape shape = input->info()->tensor_shape();
+ TensorInfo tensor_info_gxgy;
+
+ if(gradient_size < 7)
+ {
+ tensor_info_gxgy.init(shape, Format::S16);
+ }
+ else
+ {
+ tensor_info_gxgy.init(shape, Format::S32);
+ }
+
+ _gx.allocator()->init(tensor_info_gxgy);
+ _gy.allocator()->init(tensor_info_gxgy);
+
+ TensorInfo tensor_info_score(shape, Format::F32);
+ _score.allocator()->init(tensor_info_score);
+ _nonmax.allocator()->init(tensor_info_score);
+
+ _corners_list = arm_compute::cpp14::make_unique<InternalKeypoint[]>(shape.x() * shape.y());
+
+ // Set/init Sobel kernel accordingly with gradient_size
+ switch(gradient_size)
+ {
+ case 3:
+ {
+ auto k = arm_compute::cpp14::make_unique<NESobel3x3>();
+ k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+ _sobel = std::move(k);
+ break;
+ }
+ case 5:
+ {
+ auto k = arm_compute::cpp14::make_unique<NESobel5x5>();
+ k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+ _sobel = std::move(k);
+ break;
+ }
+ case 7:
+ {
+ auto k = arm_compute::cpp14::make_unique<NESobel7x7>();
+ k->configure(input, &_gx, &_gy, border_mode, constant_border_value);
+ _sobel = std::move(k);
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Gradient size not implemented");
+ }
+
+ // Normalization factor
+ const float norm_factor = 1.0f / (255.0f * pow(4.0f, gradient_size / 2) * block_size);
+
+ if(use_fp16)
+ {
+ switch(block_size)
+ {
+ case 3:
+ {
+ auto k = arm_compute::cpp14::make_unique<NEHarrisScoreFP16Kernel<3>>();
+ k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
+ _harris_score = std::move(k);
+ }
+ break;
+ case 5:
+ {
+ auto k = arm_compute::cpp14::make_unique<NEHarrisScoreFP16Kernel<5>>();
+ k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
+ _harris_score = std::move(k);
+ }
+ break;
+ case 7:
+ {
+ auto k = arm_compute::cpp14::make_unique<NEHarrisScoreFP16Kernel<7>>();
+ k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
+ _harris_score = std::move(k);
+ }
+ default:
+ break;
+ }
+ }
+ else
+ {
+ // Set/init Harris Score kernel accordingly with block_size
+ switch(block_size)
+ {
+ case 3:
+ {
+ auto k = arm_compute::cpp14::make_unique<NEHarrisScoreKernel<3>>();
+ k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
+ _harris_score = std::move(k);
+ }
+ break;
+ case 5:
+ {
+ auto k = arm_compute::cpp14::make_unique<NEHarrisScoreKernel<5>>();
+ k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
+ _harris_score = std::move(k);
+ }
+ break;
+ case 7:
+ {
+ auto k = arm_compute::cpp14::make_unique<NEHarrisScoreKernel<7>>();
+ k->configure(&_gx, &_gy, &_score, norm_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
+ _harris_score = std::move(k);
+ }
+ default:
+ break;
+ }
+ }
+
+ // Configure border filling before harris score
+ _border_gx.configure(&_gx, _harris_score->border_size(), border_mode, constant_border_value);
+ _border_gy.configure(&_gy, _harris_score->border_size(), border_mode, constant_border_value);
+
+ // Init non-maxima suppression function
+ _non_max_suppr.configure(&_score, &_nonmax, border_mode);
+
+ // Init corner candidates kernel
+ _candidates.configure(&_nonmax, _corners_list.get(), &_num_corner_candidates);
+
+ // Init euclidean distance
+ _sort_euclidean.configure(_corners_list.get(), corners, &_num_corner_candidates, min_dist);
+
+ // Allocate once all the configure methods have been called
+ _gx.allocator()->allocate();
+ _gy.allocator()->allocate();
+ _score.allocator()->allocate();
+ _nonmax.allocator()->allocate();
+}
+
+void NEHarrisCorners::run()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function");
+
+ // Init to 0 number of corner candidates
+ _num_corner_candidates = 0;
+
+ // Run Sobel kernel
+ _sobel->run();
+
+ // Fill border before harris score kernel
+ _border_gx.run(_border_gx.window());
+ _border_gy.run(_border_gy.window());
+
+ // Run harris score kernel
+ NEScheduler::get().schedule(_harris_score.get(), Window::DimY);
+
+ // Run non-maxima suppression
+ _non_max_suppr.run();
+
+ // Run corner candidate kernel
+ NEScheduler::get().schedule(&_candidates, Window::DimY);
+
+ // Run sort & euclidean distance
+ _sort_euclidean.run(_sort_euclidean.window());
+}
diff --git a/src/runtime/NEON/functions/NEHistogram.cpp b/src/runtime/NEON/functions/NEHistogram.cpp
new file mode 100644
index 0000000000..c42b2a56e0
--- /dev/null
+++ b/src/runtime/NEON/functions/NEHistogram.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEHistogram.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IDistribution1D.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NEHistogram::NEHistogram()
+ : _histogram_kernel(), _local_hist(), _window_lut(arm_compute::cpp14::make_unique<uint32_t[]>(window_lut_default_size)), _local_hist_size(0)
+{
+}
+
+void NEHistogram::configure(const IImage *input, IDistribution1D *output)
+{
+ ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
+ ARM_COMPUTE_ERROR_ON(nullptr == output);
+
+ // Allocate space for threads local histograms
+ _local_hist_size = output->num_bins() * NEScheduler::get().num_threads();
+ _local_hist = arm_compute::cpp14::make_unique<uint32_t[]>(_local_hist_size);
+
+ // Configure kernel
+ _histogram_kernel.configure(input, output, _local_hist.get(), _window_lut.get());
+}
+
+void NEHistogram::run()
+{
+ // Calculate histogram of input.
+ NEScheduler::get().schedule(&_histogram_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEIntegralImage.cpp b/src/runtime/NEON/functions/NEIntegralImage.cpp
new file mode 100644
index 0000000000..af604e9295
--- /dev/null
+++ b/src/runtime/NEON/functions/NEIntegralImage.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h"
+#include "arm_compute/core/Types.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEIntegralImage::configure(const ITensor *input, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEIntegralImageKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+ _border_handler.configure(output, _kernel->border_size(), BorderMode::CONSTANT, 0);
+}
diff --git a/src/runtime/NEON/functions/NELaplacianPyramid.cpp b/src/runtime/NEON/functions/NELaplacianPyramid.cpp
new file mode 100644
index 0000000000..8232c79f2d
--- /dev/null
+++ b/src/runtime/NEON/functions/NELaplacianPyramid.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IPyramid.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
+#include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
+#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
+#include "arm_compute/runtime/Tensor.h"
+
+using namespace arm_compute;
+
+NELaplacianPyramid::NELaplacianPyramid()
+ : _num_levels(0), _gaussian_pyr_function(), _convf(), _subf(), _gauss_pyr(), _conv_pyr(), _depth_function()
+{
+}
+
+void NELaplacianPyramid::run()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(0 == _num_levels, "Unconfigured function");
+
+ // Compute Gaussian Pyramid
+ _gaussian_pyr_function.run();
+
+ for(unsigned int i = 0; i < _num_levels; ++i)
+ {
+ // Apply Gaussian filter to gaussian pyramid image
+ _convf[i].run();
+ }
+
+ for(unsigned int i = 0; i < _num_levels; ++i)
+ {
+ // Compute laplacian image
+ _subf[i].run();
+ }
+
+ _depth_function.run();
+}
+
+void NELaplacianPyramid::configure(const ITensor *input, IPyramid *pyramid, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON(nullptr == pyramid);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S16);
+ ARM_COMPUTE_ERROR_ON(0 == pyramid->info()->num_levels());
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->info()->width());
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->info()->height());
+ ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(0));
+ ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(1));
+
+ _num_levels = pyramid->info()->num_levels();
+
+ // Create and initialize the gaussian pyramid and the convoluted pyramid
+ PyramidInfo pyramid_info;
+ pyramid_info.init(_num_levels, 0.5f, pyramid->info()->tensor_shape(), arm_compute::Format::U8);
+
+ _gauss_pyr.init(pyramid_info);
+ _conv_pyr.init(pyramid_info);
+
+ // Create Gaussian Pyramid function
+ _gaussian_pyr_function.configure(input, &_gauss_pyr, border_mode, constant_border_value);
+
+ _convf = arm_compute::cpp14::make_unique<NEGaussian5x5[]>(_num_levels);
+ _subf = arm_compute::cpp14::make_unique<NEArithmeticSubtraction[]>(_num_levels);
+
+ for(unsigned int i = 0; i < _num_levels; ++i)
+ {
+ _convf[i].configure(_gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), border_mode, constant_border_value);
+ _subf[i].configure(_gauss_pyr.get_pyramid_level(i), _conv_pyr.get_pyramid_level(i), pyramid->get_pyramid_level(i), ConvertPolicy::WRAP);
+ }
+
+ _depth_function.configure(_conv_pyr.get_pyramid_level(_num_levels - 1), output, ConvertPolicy::WRAP, 0);
+
+ _gauss_pyr.allocate();
+ _conv_pyr.allocate();
+}
diff --git a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp
new file mode 100644
index 0000000000..36ac4a74d1
--- /dev/null
+++ b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IPyramid.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+
+#include <cstddef>
+
+using namespace arm_compute;
+
+NELaplacianReconstruct::NELaplacianReconstruct()
+ : _tmp_pyr(), _addf(), _scalef(), _depthf()
+{
+}
+
+void NELaplacianReconstruct::configure(const IPyramid *pyramid, const ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON(nullptr == pyramid);
+ ARM_COMPUTE_ERROR_ON(input == output);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions());
+ ARM_COMPUTE_ERROR_ON(output->info()->num_dimensions() != pyramid->get_pyramid_level(0)->info()->num_dimensions());
+ ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != pyramid->get_pyramid_level(0)->info()->dimension(0));
+ ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) != pyramid->get_pyramid_level(0)->info()->dimension(1));
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(0));
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != pyramid->get_pyramid_level(pyramid->info()->num_levels() - 1)->info()->dimension(1));
+
+ const size_t num_levels = pyramid->info()->num_levels();
+
+ // Create and initialize the tmp pyramid: I(n-2) = upsample( input + Laplace(n-1) )
+ PyramidInfo pyramid_info;
+ pyramid_info.init(num_levels, 0.5f, output->info()->tensor_shape(), arm_compute::Format::S16);
+
+ _tmp_pyr.init(pyramid_info);
+
+ // Allocate add and scale functions. Level 0 does not need to be scaled.
+ _addf = arm_compute::cpp14::make_unique<NEArithmeticAddition[]>(num_levels);
+ _scalef = arm_compute::cpp14::make_unique<NEScale[]>(num_levels - 1);
+
+ const size_t last_level = num_levels - 1;
+
+ _addf[last_level].configure(input, pyramid->get_pyramid_level(last_level), _tmp_pyr.get_pyramid_level(last_level), ConvertPolicy::SATURATE);
+
+ // Scale levels n-1 to 1, and add levels n-2 to 0
+ for(size_t l = 0; l < last_level; ++l)
+ {
+ _scalef[l].configure(_tmp_pyr.get_pyramid_level(l + 1), _tmp_pyr.get_pyramid_level(l), arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value);
+ _addf[l].configure(_tmp_pyr.get_pyramid_level(l), pyramid->get_pyramid_level(l), _tmp_pyr.get_pyramid_level(l), ConvertPolicy::SATURATE);
+ }
+
+ // Convert level 0 from S16 to U8
+ _depthf.configure(_tmp_pyr.get_pyramid_level(0), output, ConvertPolicy::SATURATE, 0);
+
+ _tmp_pyr.allocate();
+}
+
+void NELaplacianReconstruct::run()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_addf == nullptr, "Unconfigured function");
+
+ const size_t last_level = _tmp_pyr.info()->num_levels() - 1;
+
+ _addf[last_level].run();
+
+ // Run l = [last_level - 1, 0]
+ for(size_t l = last_level; l-- > 0;)
+ {
+ _scalef[l].run();
+ _addf[l].run();
+ }
+
+ _depthf.run();
+}
diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
new file mode 100644
index 0000000000..85d7ba3650
--- /dev/null
+++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h"
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+#include <cmath>
+#include <tuple>
+
+using namespace arm_compute;
+
+NELocallyConnectedLayer::NELocallyConnectedLayer()
+ : _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(), _is_first_run(false)
+{
+}
+
+void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
+ ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
+
+ if(biases != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+ ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3));
+ ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 2);
+ }
+
+ bool _has_bias = (biases != nullptr);
+ _is_first_run = true;
+
+ // Get parameters for conv_info
+ unsigned int stride_x = 0;
+ unsigned int stride_y = 0;
+ unsigned int pad_x = 0;
+ unsigned int pad_y = 0;
+ std::tie(stride_x, stride_y) = conv_info.stride();
+ std::tie(pad_x, pad_y) = conv_info.pad();
+
+ // Get convolved dimensions
+ unsigned int conv_w = 0;
+ unsigned int conv_h = 0;
+ std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0),
+ stride_x, stride_y, pad_x, pad_y, conv_info.round());
+
+ ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one");
+ ARM_COMPUTE_ERROR_ON_MSG(weights->info()->dimension(4) != (conv_w * conv_h), "Weights shape does not match the expected one");
+
+ // Create tensor to store the reshaped weights
+ const size_t mat_weights_cols = weights->info()->dimension(3);
+ const size_t mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + ((_has_bias) ? 1 : 0);
+ const size_t mat_weights_num = weights->info()->dimension(4);
+
+ const TensorShape shape_wr(mat_weights_cols, mat_weights_rows, mat_weights_num);
+
+ _weights_reshaped.allocator()->init(TensorInfo(shape_wr, 1, weights->info()->data_type()));
+
+ // Create tensor to store im2col reshaped inputs
+ const size_t mat_input_cols = mat_weights_rows;
+ const size_t mat_input_rows = conv_w * conv_h;
+ TensorShape shape_im2col = input->info()->tensor_shape();
+ shape_im2col.set(0, mat_input_cols);
+ shape_im2col.set(1, mat_input_rows);
+ shape_im2col.set(2, 1);
+
+ _input_im2col_reshaped.allocator()->init(TensorInfo(shape_im2col, 1, input->info()->data_type()));
+
+ // Create locally connected layer output tensor
+ TensorShape shape_gemm = _input_im2col_reshaped.info()->tensor_shape();
+ shape_gemm.set(0, mat_weights_cols);
+ shape_gemm.set(1, mat_input_rows);
+ _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type()));
+
+ // Configure kernels
+ _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias);
+ _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped);
+ _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
+ _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h));
+
+ // Allocate intermediate tensors
+ _weights_reshaped.allocator()->allocate();
+ _input_im2col_reshaped.allocator()->allocate();
+ _gemm_output.allocator()->allocate();
+}
+
+void NELocallyConnectedLayer::run()
+{
+ // Run weights reshaping (Runs once for every configure)
+ if(_is_first_run)
+ {
+ _is_first_run = false;
+ NEScheduler::get().schedule(&_weights_reshape_kernel, 3);
+ }
+
+ // Run input reshaping
+ NEScheduler::get().schedule(&_input_im2col_kernel, Window::DimY);
+
+ // Runs GEMM on reshaped matrices
+ NEScheduler::get().schedule(&_mm_kernel, Window::DimX);
+
+ // Reshape output matrix
+ NEScheduler::get().schedule(&_output_col2im_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEMagnitude.cpp b/src/runtime/NEON/functions/NEMagnitude.cpp
new file mode 100644
index 0000000000..9390ca2b6a
--- /dev/null
+++ b/src/runtime/NEON/functions/NEMagnitude.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEMagnitude.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
+#include "arm_compute/core/Types.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEMagnitude::configure(const ITensor *input1, const ITensor *input2, ITensor *output, bool use_fp16)
+{
+ if(use_fp16)
+ {
+ auto k = arm_compute::cpp14::make_unique<NEMagnitudePhaseFP16Kernel<MagnitudeType::L2NORM, PhaseType::SIGNED>>();
+ k->configure(input1, input2, output, nullptr);
+ _kernel = std::move(k);
+ }
+ else
+ {
+ auto k = arm_compute::cpp14::make_unique<NEMagnitudePhaseKernel<MagnitudeType::L2NORM, PhaseType::SIGNED>>();
+ k->configure(input1, input2, output, nullptr);
+ _kernel = std::move(k);
+ }
+}
diff --git a/src/runtime/NEON/functions/NEMeanStdDev.cpp b/src/runtime/NEON/functions/NEMeanStdDev.cpp
new file mode 100644
index 0000000000..47143f5e5b
--- /dev/null
+++ b/src/runtime/NEON/functions/NEMeanStdDev.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEMeanStdDev.h"
+
+#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NEMeanStdDev::NEMeanStdDev()
+ : _mean_stddev_kernel(), _global_sum(0), _global_sum_squared(0)
+{
+}
+
+void NEMeanStdDev::configure(const IImage *input, float *mean, float *stddev)
+{
+ _mean_stddev_kernel.configure(input, mean, &_global_sum, stddev, &_global_sum_squared);
+}
+
+void NEMeanStdDev::run()
+{
+ _global_sum = 0;
+ _global_sum_squared = 0;
+
+ NEScheduler::get().schedule(&_mean_stddev_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEMedian3x3.cpp b/src/runtime/NEON/functions/NEMedian3x3.cpp
new file mode 100644
index 0000000000..aa7cc97081
--- /dev/null
+++ b/src/runtime/NEON/functions/NEMedian3x3.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h"
+#include "arm_compute/core/PixelValue.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEMedian3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode, uint8_t constant_border_value)
+{
+ auto k = arm_compute::cpp14::make_unique<NEMedian3x3Kernel>();
+ k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NEMinMaxLocation.cpp b/src/runtime/NEON/functions/NEMinMaxLocation.cpp
new file mode 100644
index 0000000000..cab9200cf8
--- /dev/null
+++ b/src/runtime/NEON/functions/NEMinMaxLocation.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEMinMaxLocation.h"
+
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NEMinMaxLocation::NEMinMaxLocation()
+ : _min_max(), _min_max_loc()
+{
+}
+
+void NEMinMaxLocation::configure(const IImage *input, int32_t *min, int32_t *max, ICoordinates2DArray *min_loc, ICoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count)
+{
+ _min_max.configure(input, min, max);
+ _min_max_loc.configure(input, min, max, min_loc, max_loc, min_count, max_count);
+}
+
+void NEMinMaxLocation::run()
+{
+ _min_max.reset();
+
+ /* Run min max kernel */
+ NEScheduler::get().schedule(&_min_max, Window::DimY);
+
+ /* Run min max location */
+ NEScheduler::get().schedule(&_min_max_loc, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NENonLinearFilter.cpp b/src/runtime/NEON/functions/NENonLinearFilter.cpp
new file mode 100644
index 0000000000..01aea3b671
--- /dev/null
+++ b/src/runtime/NEON/functions/NENonLinearFilter.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h"
+#include "arm_compute/core/PixelValue.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NENonLinearFilter::configure(ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
+ BorderMode border_mode,
+ uint8_t constant_border_value)
+{
+ auto k = arm_compute::cpp14::make_unique<NENonLinearFilterKernel>();
+ k->configure(input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
new file mode 100644
index 0000000000..a7b3759a45
--- /dev/null
+++ b/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NENonMaximaSuppression3x3::configure(ITensor *input, ITensor *output, BorderMode border_mode)
+{
+ auto k = arm_compute::cpp14::make_unique<NENonMaximaSuppression3x3Kernel>();
+ k->configure(input, output, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+
+ if(border_mode != BorderMode::UNDEFINED)
+ {
+ _border_handler.configure(input, 1, BorderMode::CONSTANT, 0);
+ }
+ else
+ {
+ _border_handler.configure(input, 1, BorderMode::UNDEFINED, 0);
+ }
+}
diff --git a/src/runtime/NEON/functions/NENormalizationLayer.cpp b/src/runtime/NEON/functions/NENormalizationLayer.cpp
new file mode 100644
index 0000000000..69ff32591f
--- /dev/null
+++ b/src/runtime/NEON/functions/NENormalizationLayer.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NENormalizationLayer::NENormalizationLayer()
+ : _norm_kernel(), _multiply_kernel(), _border_handler(), _input_squared()
+{
+}
+
+void NENormalizationLayer::configure(const ITensor *input, ITensor *output, NormalizationLayerInfo norm_info)
+{
+ ARM_COMPUTE_ERROR_ON(input == nullptr);
+
+ TensorInfo tensor_info(input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+ _input_squared.allocator()->init(tensor_info);
+
+ // Configure kernels
+ _norm_kernel.configure(input, &_input_squared, output, norm_info);
+ _multiply_kernel.configure(input, input, &_input_squared, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+ _border_handler.configure(&_input_squared, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0.0f));
+
+ // Allocate the tensor once the configure methods have been called
+ _input_squared.allocator()->allocate();
+}
+
+void NENormalizationLayer::run()
+{
+ NEScheduler::get().schedule(&_multiply_kernel, Window::DimY);
+ NEScheduler::get().schedule(&_border_handler, Window::DimY);
+ NEScheduler::get().schedule(&_norm_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NEOpticalFlow.cpp b/src/runtime/NEON/functions/NEOpticalFlow.cpp
new file mode 100644
index 0000000000..49135e442c
--- /dev/null
+++ b/src/runtime/NEON/functions/NEOpticalFlow.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEOpticalFlow.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h"
+#include "arm_compute/runtime/Pyramid.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+using namespace arm_compute;
+
+NEOpticalFlow::NEOpticalFlow()
+ : _func_scharr(), _kernel_tracker(), _scharr_gx(), _scharr_gy(), _new_points(nullptr), _new_points_estimates(nullptr), _old_points(nullptr), _new_points_internal(), _old_points_internal(),
+ _num_levels(0)
+{
+}
+
+void NEOpticalFlow::configure(const Pyramid *old_pyramid, const Pyramid *new_pyramid, const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates,
+ IKeyPointArray *new_points, Termination termination, float epsilon, unsigned int num_iterations, size_t window_dimension,
+ bool use_initial_estimate, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON(nullptr == old_pyramid);
+ ARM_COMPUTE_ERROR_ON(nullptr == new_pyramid);
+ ARM_COMPUTE_ERROR_ON(nullptr == old_points);
+ ARM_COMPUTE_ERROR_ON(nullptr == new_points_estimates);
+ ARM_COMPUTE_ERROR_ON(nullptr == new_points);
+ ARM_COMPUTE_ERROR_ON(old_pyramid->info()->num_levels() != new_pyramid->info()->num_levels());
+ ARM_COMPUTE_ERROR_ON(0 == old_pyramid->info()->num_levels());
+ ARM_COMPUTE_ERROR_ON(old_pyramid->info()->width() != new_pyramid->info()->width());
+ ARM_COMPUTE_ERROR_ON(old_pyramid->info()->height() != new_pyramid->info()->height());
+ ARM_COMPUTE_ERROR_ON(use_initial_estimate && old_points->num_values() != new_points_estimates->num_values());
+
+ _num_levels = old_pyramid->info()->num_levels();
+ _old_points = old_points;
+ _new_points = new_points;
+ _new_points_estimates = new_points_estimates;
+
+ const float pyr_scale = old_pyramid->info()->scale();
+
+ _func_scharr = arm_compute::cpp14::make_unique<NEScharr3x3[]>(_num_levels);
+ _kernel_tracker = arm_compute::cpp14::make_unique<NELKTrackerKernel[]>(_num_levels);
+ _scharr_gx = arm_compute::cpp14::make_unique<Tensor[]>(_num_levels);
+ _scharr_gy = arm_compute::cpp14::make_unique<Tensor[]>(_num_levels);
+
+ _old_points_internal = LKInternalKeypointArray(old_points->num_values());
+ _new_points_internal = LKInternalKeypointArray(old_points->num_values());
+ _new_points->resize(old_points->num_values());
+
+ for(unsigned int i = 0; i < _num_levels; ++i)
+ {
+ // Get images from the ith level of old and right pyramid
+ IImage *old_ith_input = old_pyramid->get_pyramid_level(i);
+ IImage *new_ith_input = new_pyramid->get_pyramid_level(i);
+
+ // Get width and height of images
+ const unsigned int width_ith = old_ith_input->info()->dimension(0);
+ const unsigned int height_ith = new_ith_input->info()->dimension(1);
+
+ TensorInfo tensor_info(TensorShape(width_ith, height_ith), Format::S16);
+
+ _scharr_gx[i].allocator()->init(tensor_info);
+ _scharr_gy[i].allocator()->init(tensor_info);
+
+ // Init Scharr kernel
+ _func_scharr[i].configure(old_ith_input, _scharr_gx.get() + i, _scharr_gy.get() + i, border_mode, constant_border_value);
+
+ // Init Lucas-Kanade kernel
+ _kernel_tracker[i].configure(old_ith_input, new_ith_input, _scharr_gx.get() + i, _scharr_gy.get() + i,
+ old_points, new_points_estimates, new_points,
+ &_old_points_internal, &_new_points_internal,
+ termination, use_initial_estimate, epsilon, num_iterations, window_dimension,
+ i, _num_levels, pyr_scale);
+
+ _scharr_gx[i].allocator()->allocate();
+ _scharr_gy[i].allocator()->allocate();
+ }
+}
+
+void NEOpticalFlow::run()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_num_levels == 0, "Unconfigured function");
+
+ for(unsigned int level = _num_levels; level > 0; --level)
+ {
+ // Run Scharr kernel
+ _func_scharr[level - 1].run();
+
+ // Run Lucas-Kanade kernel
+ NEScheduler::get().schedule(_kernel_tracker.get() + level - 1, Window::DimX);
+ }
+}
diff --git a/src/runtime/NEON/functions/NEPhase.cpp b/src/runtime/NEON/functions/NEPhase.cpp
new file mode 100644
index 0000000000..7683f461d3
--- /dev/null
+++ b/src/runtime/NEON/functions/NEPhase.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEPhase.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEPhase::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NEMagnitudePhaseKernel<MagnitudeType::L2NORM, PhaseType::SIGNED>>();
+ k->configure(input1, input2, nullptr, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp
new file mode 100644
index 0000000000..056d33b370
--- /dev/null
+++ b/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEPixelWiseMultiplication::configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy)
+{
+ auto k = arm_compute::cpp14::make_unique<NEPixelWiseMultiplicationKernel>();
+ k->configure(input1, input2, output, scale, overflow_policy, rounding_policy);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp
new file mode 100644
index 0000000000..6f0cc4f160
--- /dev/null
+++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
+
+using namespace arm_compute;
+
+void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+{
+ // Configure pooling kernel
+ auto k = arm_compute::cpp14::make_unique<NEPoolingLayerKernel>();
+ k->configure(input, output, pool_info);
+ _kernel = std::move(k);
+
+ // Configure border depending on operation required
+ BorderMode border_mode = (pool_info.pool_type() == PoolingType::MAX) ? BorderMode::REPLICATE : BorderMode::CONSTANT;
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0));
+}
diff --git a/src/runtime/NEON/functions/NERemap.cpp b/src/runtime/NEON/functions/NERemap.cpp
new file mode 100644
index 0000000000..9f06fb699c
--- /dev/null
+++ b/src/runtime/NEON/functions/NERemap.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NERemap.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/kernels/NERemapKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NERemap::configure(ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported");
+
+ auto k = arm_compute::cpp14::make_unique<NERemapKernel>();
+
+ k->configure(input, map_x, map_y, output, policy);
+
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
new file mode 100644
index 0000000000..b70f626df0
--- /dev/null
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEScale.h"
+
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include <cmath>
+#include <cstddef>
+#include <utility>
+
+using namespace arm_compute;
+
+namespace
+{
+void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float wr, float hr, size_t input_element_size)
+{
+ ARM_COMPUTE_ERROR_ON(nullptr == offsets);
+
+ Window win;
+ win.set(Window::DimX, Window::Dimension(0, offsets->info()->dimension(0), 1));
+ win.set(Window::DimY, Window::Dimension(0, offsets->info()->dimension(1), 1));
+
+ if(dx != nullptr && dy != nullptr)
+ {
+ // Pre-compute the offset and pixel's distance for BILINEAR interpolation
+ Iterator offsets_it(offsets, win);
+ Iterator dx_it(dx, win);
+ Iterator dy_it(dy, win);
+
+ execute_window_loop(win, [&](const Coordinates & id)
+ {
+ const float in_x = (id.x() + 0.5f) * wr - 0.5f;
+ const float in_y = (id.y() + 0.5f) * hr - 0.5f;
+ const int in_xi = std::floor(in_x);
+ const int in_yi = std::floor(in_y);
+
+ *reinterpret_cast<int32_t *>(offsets_it.ptr()) = in_xi * input_element_size;
+ *reinterpret_cast<float *>(dx_it.ptr()) = in_x - in_xi;
+ *reinterpret_cast<float *>(dy_it.ptr()) = in_y - in_yi;
+ },
+ offsets_it, dx_it, dy_it);
+ }
+ else
+ {
+ // Pre-compute the offset for NEAREST interpolation
+ Iterator offsets_it(offsets, win);
+
+ execute_window_loop(win, [&](const Coordinates & id)
+ {
+ const size_t in_xi = (id.x() + 0.5f) * wr;
+
+ *reinterpret_cast<int32_t *>(offsets_it.ptr()) = in_xi * input_element_size;
+ },
+ offsets_it);
+ }
+}
+} // namespace
+
+NEScale::NEScale()
+ : _offsets(), _dx(), _dy()
+{
+}
+
+void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON(nullptr == input);
+ ARM_COMPUTE_ERROR_ON(nullptr == output);
+
+ for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
+ {
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i));
+ }
+
+ // Get the tensor shape
+ const TensorShape shape(output->info()->dimension(0), output->info()->dimension(1));
+
+ // Compute the ratio between source width/height and destination width/height
+ const auto wr = static_cast<float>(input->info()->dimension(0)) / static_cast<float>(output->info()->dimension(0));
+ const auto hr = static_cast<float>(input->info()->dimension(1)) / static_cast<float>(output->info()->dimension(1));
+
+ // Get the element size of the input image
+ const size_t input_element_size = input->info()->element_size();
+
+ // Area interpolation behaves as Nearest Neighbour in case of up-sampling
+ if(policy == InterpolationPolicy::AREA && wr <= 1.f && hr <= 1.f)
+ {
+ policy = InterpolationPolicy::NEAREST_NEIGHBOR;
+ }
+
+ auto k = arm_compute::cpp14::make_unique<NEScaleKernel>();
+
+ // Check if the border mode is UNDEFINED
+ const bool border_undefined = border_mode == BorderMode::UNDEFINED;
+
+ switch(policy)
+ {
+ case InterpolationPolicy::NEAREST_NEIGHBOR:
+ {
+ TensorInfo tensor_info_offsets(shape, Format::S32);
+ _offsets.allocator()->init(tensor_info_offsets);
+
+ k->configure(input, nullptr, nullptr, &_offsets, output, policy, border_undefined);
+
+ // Allocate once the configure methods have been called
+ _offsets.allocator()->allocate();
+
+ // Pre-compute offsets for nearest interpolation
+ precompute_dx_dy_offsets(nullptr, nullptr, &_offsets, wr, hr, input_element_size);
+ break;
+ }
+ case InterpolationPolicy::BILINEAR:
+ {
+ TensorInfo tensor_info_offsets(shape, Format::S32);
+ TensorInfo tensor_info_dxdy(shape, Format::F32);
+
+ _offsets.allocator()->init(tensor_info_offsets);
+ _dx.allocator()->init(tensor_info_dxdy);
+ _dy.allocator()->init(tensor_info_dxdy);
+
+ k->configure(input, &_dx, &_dy, &_offsets, output, policy, border_undefined);
+
+ // Allocate once the configure methods have been called
+ _offsets.allocator()->allocate();
+ _dx.allocator()->allocate();
+ _dy.allocator()->allocate();
+
+ // Pre-compute dx, dy and offsets for bilinear interpolation
+ precompute_dx_dy_offsets(&_dx, &_dy, &_offsets, wr, hr, input_element_size);
+ break;
+ }
+ case InterpolationPolicy::AREA:
+ {
+ k->configure(input, nullptr, nullptr, nullptr, output, policy, border_undefined);
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Unsupported interpolation mode");
+ }
+
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NEScharr3x3.cpp b/src/runtime/NEON/functions/NEScharr3x3.cpp
new file mode 100644
index 0000000000..04b3f14ce7
--- /dev/null
+++ b/src/runtime/NEON/functions/NEScharr3x3.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h"
+#include "arm_compute/core/PixelValue.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEScharr3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+ auto k = arm_compute::cpp14::make_unique<NEScharr3x3Kernel>();
+ k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NESobel3x3.cpp b/src/runtime/NEON/functions/NESobel3x3.cpp
new file mode 100644
index 0000000000..3b46fd78c1
--- /dev/null
+++ b/src/runtime/NEON/functions/NESobel3x3.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NESobel3x3.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h"
+#include "arm_compute/core/PixelValue.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NESobel3x3::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+ auto k = arm_compute::cpp14::make_unique<NESobel3x3Kernel>();
+ k->configure(input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _kernel = std::move(k);
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+}
diff --git a/src/runtime/NEON/functions/NESobel5x5.cpp b/src/runtime/NEON/functions/NESobel5x5.cpp
new file mode 100644
index 0000000000..8967a22ba1
--- /dev/null
+++ b/src/runtime/NEON/functions/NESobel5x5.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NESobel5x5.h"
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+using namespace arm_compute;
+
+NESobel5x5::NESobel5x5()
+ : _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler()
+{
+}
+
+void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+
+ const bool run_sobel_x = output_x != nullptr;
+ const bool run_sobel_y = output_y != nullptr;
+
+ TensorInfo tensor_info(input->info()->tensor_shape(), Format::S16);
+
+ if(run_sobel_x && run_sobel_y)
+ {
+ _tmp_x.allocator()->init(tensor_info);
+ _tmp_y.allocator()->init(tensor_info);
+ _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _tmp_x.allocator()->allocate();
+ _tmp_y.allocator()->allocate();
+ }
+ else if(run_sobel_x)
+ {
+ _tmp_x.allocator()->init(tensor_info);
+ _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _tmp_x.allocator()->allocate();
+ }
+ else if(run_sobel_y)
+ {
+ _tmp_y.allocator()->init(tensor_info);
+ _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+ _tmp_y.allocator()->allocate();
+ }
+
+ _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+}
+
+void NESobel5x5::run()
+{
+ _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_sobel_hor, Window::DimY);
+ NEScheduler::get().schedule(&_sobel_vert, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NESobel7x7.cpp b/src/runtime/NEON/functions/NESobel7x7.cpp
new file mode 100644
index 0000000000..f628da9709
--- /dev/null
+++ b/src/runtime/NEON/functions/NESobel7x7.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NESobel7x7.h"
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+using namespace arm_compute;
+
+NESobel7x7::NESobel7x7()
+ : _sobel_hor(), _sobel_vert(), _tmp_x(), _tmp_y(), _border_handler()
+{
+}
+
+void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+
+ const bool run_sobel_x = output_x != nullptr;
+ const bool run_sobel_y = output_y != nullptr;
+
+ TensorInfo tensor_info(input->info()->tensor_shape(), Format::S32);
+
+ if(run_sobel_x && run_sobel_y)
+ {
+ _tmp_x.allocator()->init(tensor_info);
+ _tmp_y.allocator()->init(tensor_info);
+ _sobel_hor.configure(input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert.configure(&_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _tmp_x.allocator()->allocate();
+ _tmp_y.allocator()->allocate();
+ }
+ else if(run_sobel_x)
+ {
+ _tmp_x.allocator()->init(tensor_info);
+ _sobel_hor.configure(input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert.configure(&_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _tmp_x.allocator()->allocate();
+ }
+ else if(run_sobel_y)
+ {
+ _tmp_y.allocator()->init(tensor_info);
+ _sobel_hor.configure(input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert.configure(nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+ _tmp_y.allocator()->allocate();
+ }
+
+ _border_handler.configure(input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+}
+
+void NESobel7x7::run()
+{
+ _border_handler.run(_border_handler.window());
+ NEScheduler::get().schedule(&_sobel_hor, Window::DimY);
+ NEScheduler::get().schedule(&_sobel_vert, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
new file mode 100644
index 0000000000..0651eab1bc
--- /dev/null
+++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+#include <cfloat>
+
+using namespace arm_compute;
+
+NESoftmaxLayer::NESoftmaxLayer()
+ : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _fill_border_kernel(), _max(), _sum(), _tmp()
+{
+}
+
+void NESoftmaxLayer::configure(ITensor *input, ITensor *output)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32);
+
+ // Create intermediate tensors shapes
+ TensorInfo tensor_info_tmp(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position());
+ _tmp.allocator()->init(tensor_info_tmp);
+
+ TensorShape shape = input->info()->tensor_shape();
+ shape.set(0, 1);
+ TensorInfo tensor_info_max_sum(shape, input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position());
+ _max.allocator()->init(tensor_info_max_sum);
+ _sum.allocator()->init(tensor_info_max_sum);
+
+ // Configure Kernels
+ _max_kernel.configure(input, &_max);
+ _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum);
+ _norm_kernel.configure(&_tmp, &_sum, output);
+ _fill_border_kernel.configure(input, _max_kernel.border_size(), BorderMode::CONSTANT, PixelValue(-FLT_MAX));
+
+ // Allocate intermediate tensors
+ _tmp.allocator()->allocate();
+ _max.allocator()->allocate();
+ _sum.allocator()->allocate();
+}
+
+void NESoftmaxLayer::run()
+{
+ NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY);
+ NEScheduler::get().schedule(&_max_kernel, Window::DimY);
+ NEScheduler::get().schedule(&_shift_exp_sum_kernel, Window::DimY);
+ NEScheduler::get().schedule(&_norm_kernel, Window::DimY);
+}
diff --git a/src/runtime/NEON/functions/NETableLookup.cpp b/src/runtime/NEON/functions/NETableLookup.cpp
new file mode 100644
index 0000000000..ebb8a0ac9b
--- /dev/null
+++ b/src/runtime/NEON/functions/NETableLookup.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NETableLookup.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NETableLookup::configure(const ITensor *input, const ILut *lut, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NETableLookupKernel>();
+ k->configure(input, lut, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEThreshold.cpp b/src/runtime/NEON/functions/NEThreshold.cpp
new file mode 100644
index 0000000000..93dc124880
--- /dev/null
+++ b/src/runtime/NEON/functions/NEThreshold.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEThreshold.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEThreshold::configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
+{
+ auto k = arm_compute::cpp14::make_unique<NEThresholdKernel>();
+ k->configure(input, output, threshold, false_value, true_value, type, upper);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NETranspose.cpp b/src/runtime/NEON/functions/NETranspose.cpp
new file mode 100644
index 0000000000..53ac9c5ee3
--- /dev/null
+++ b/src/runtime/NEON/functions/NETranspose.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NETranspose.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NETranspose::configure(const ITensor *input, ITensor *output)
+{
+ auto k = arm_compute::cpp14::make_unique<NETransposeKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEWarpAffine.cpp b/src/runtime/NEON/functions/NEWarpAffine.cpp
new file mode 100644
index 0000000000..24fb16f9e3
--- /dev/null
+++ b/src/runtime/NEON/functions/NEWarpAffine.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEWarpAffine.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEWarpAffine::configure(ITensor *input, ITensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON(nullptr == matrix);
+
+ switch(policy)
+ {
+ case InterpolationPolicy::NEAREST_NEIGHBOR:
+ {
+ auto k = arm_compute::cpp14::make_unique<NEWarpAffineKernel<InterpolationPolicy::NEAREST_NEIGHBOR>>();
+ k->configure(input, output, matrix, border_mode, constant_border_value);
+ _kernel = std::move(k);
+ break;
+ }
+ case InterpolationPolicy::BILINEAR:
+ {
+ auto k = arm_compute::cpp14::make_unique<NEWarpAffineKernel<InterpolationPolicy::BILINEAR>>();
+ k->configure(input, output, matrix, border_mode, constant_border_value);
+ _kernel = std::move(k);
+ break;
+ }
+ case InterpolationPolicy::AREA:
+ default:
+ ARM_COMPUTE_ERROR("Interpolation type not supported");
+ }
+
+ _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value);
+}
diff --git a/src/runtime/NEON/functions/NEWarpPerspective.cpp b/src/runtime/NEON/functions/NEWarpPerspective.cpp
new file mode 100644
index 0000000000..84b2df5bfa
--- /dev/null
+++ b/src/runtime/NEON/functions/NEWarpPerspective.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEWarpPerspective.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEWarpPerspective::configure(ITensor *input, ITensor *output, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON(nullptr == matrix);
+
+ switch(policy)
+ {
+ case InterpolationPolicy::NEAREST_NEIGHBOR:
+ {
+ auto k = arm_compute::cpp14::make_unique<NEWarpPerspectiveKernel<InterpolationPolicy::NEAREST_NEIGHBOR>>();
+ k->configure(input, output, matrix, border_mode, constant_border_value);
+ _kernel = std::move(k);
+ break;
+ }
+ case InterpolationPolicy::BILINEAR:
+ {
+ auto k = arm_compute::cpp14::make_unique<NEWarpPerspectiveKernel<InterpolationPolicy::BILINEAR>>();
+ k->configure(input, output, matrix, border_mode, constant_border_value);
+ _kernel = std::move(k);
+ break;
+ }
+ case InterpolationPolicy::AREA:
+ default:
+ ARM_COMPUTE_ERROR("Interpolation type not supported");
+ }
+
+ _border_handler.configure(input, _kernel->border_size(), border_mode, constant_border_value);
+}