aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2018-11-28 14:59:47 +0000
committerMichalis Spyrou <michalis.spyrou@arm.com>2018-11-30 15:15:07 +0000
commit95abfddfa08ab85d4f88c6f4d2e077969178f2d5 (patch)
treedd6124e42635423cf01b4b9face5c983c67cb0e4
parenteb6aad7e1eee0c0659ea71444ed4093c52267af3 (diff)
downloadComputeLibrary-95abfddfa08ab85d4f88c6f4d2e077969178f2d5.tar.gz
COMPMID-1815 Remove templates from NEFillborderKernel and create INESimpleFunctionNoBorder
Change-Id: Ia9fdc75b23e9a6208058f8406fb7b5fcd917de2c Reviewed-on: https://review.mlplatform.org/311 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/NEFillBorderKernel.h2
-rw-r--r--arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h48
-rw-r--r--arm_compute/runtime/NEON/functions/NEAccumulate.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NEActivationLayer.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseAnd.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseNot.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseOr.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEBitwiseXor.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelCombine.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelExtract.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NECol2Im.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEColorConvert.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NECopy.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEFlattenLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEFloor.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEHOGDetector.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEIm2Col.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NELSTMLayer.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEMagnitude.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEPermute.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEPhase.h10
-rw-r--r--arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NERNNLayer.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEReorgLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEReshapeLayer.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NETableLookup.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEThreshold.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NETranspose.h6
-rw-r--r--arm_compute/runtime/NEON/functions/NEYOLOLayer.h4
-rw-r--r--src/core/NEON/kernels/NEFillBorderKernel.cpp160
-rw-r--r--src/runtime/NEON/INESimpleFunctionNoBorder.cpp39
38 files changed, 236 insertions, 218 deletions
diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
index cff6b4ea2d..4769cfa121 100644
--- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
+++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
@@ -69,9 +69,7 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
private:
- template <typename T>
void fill_replicate_single_channel(const Window &window);
- template <typename T>
void fill_constant_value_single_channel(const Window &window);
ITensor *_tensor;
diff --git a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
new file mode 100644
index 0000000000..6765b5f937
--- /dev/null
+++ b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H__
+#define __ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic interface for functions which have a single NEON kernel and no border */
+class INESimpleFunctionNoBorder : public IFunction
+{
+public:
+ /** Constructor */
+ INESimpleFunctionNoBorder();
+
+ // Inherited methods overridden:
+ void run() override final;
+
+protected:
+ std::unique_ptr<INEKernel> _kernel; /**< Kernel to run */
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h
index de532c37a0..36b8bec3aa 100644
--- a/arm_compute/runtime/NEON/functions/NEAccumulate.h
+++ b/arm_compute/runtime/NEON/functions/NEAccumulate.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NEACCUMULATE_H__
#define __ARM_COMPUTE_NEACCUMULATE_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include <cstdint>
@@ -33,7 +33,7 @@ namespace arm_compute
class ITensor;
/** Basic function to run @ref NEAccumulateKernel */
-class NEAccumulate : public INESimpleFunction
+class NEAccumulate : public INESimpleFunctionNoBorder
{
public:
/** Set the input and accumulation tensors
@@ -45,7 +45,7 @@ public:
};
/** Basic function to run @ref NEAccumulateWeightedKernel */
-class NEAccumulateWeighted : public INESimpleFunction
+class NEAccumulateWeighted : public INESimpleFunctionNoBorder
{
public:
/** Set the input and accumulation tensors, and the scale value
@@ -59,7 +59,7 @@ public:
};
/** Basic function to run @ref NEAccumulateSquaredKernel */
-class NEAccumulateSquared : public INESimpleFunction
+class NEAccumulateSquared : public INESimpleFunctionNoBorder
{
public:
/** Set the input and accumulation tensors and the shift value.
@@ -70,5 +70,5 @@ public:
*/
void configure(const ITensor *input, uint32_t shift, ITensor *output);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEACCUMULATE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index a65146d461..588de04332 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NEACTIVATIONLAYER_H__
#define __ARM_COMPUTE_NEACTIVATIONLAYER_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/core/Types.h"
@@ -36,7 +36,7 @@ class ITensor;
*
* @note The function simulates an activation layer with the specified activation function.
*/
-class NEActivationLayer : public INESimpleFunction
+class NEActivationLayer : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensor.
@@ -60,5 +60,5 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEACTIVATIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
index 0250293e97..bdcbaba3fa 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,14 @@
#ifndef __ARM_COMPUTE_NEBITWISEAND_H__
#define __ARM_COMPUTE_NEBITWISEAND_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
/** Basic function to run @ref NEBitwiseAndKernel */
-class NEBitwiseAnd : public INESimpleFunction
+class NEBitwiseAnd : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs and output
@@ -42,5 +42,5 @@ public:
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEBITWISEAND_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
index 62c08ffcf9..c2321a89b8 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,14 @@
#ifndef __ARM_COMPUTE_NEBITWISENOT_H__
#define __ARM_COMPUTE_NEBITWISENOT_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
/** Basic function to run @ref NEBitwiseNotKernel */
-class NEBitwiseNot : public INESimpleFunction
+class NEBitwiseNot : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's input and output
@@ -41,5 +41,5 @@ public:
*/
void configure(const ITensor *input, ITensor *output);
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEBITWISENOT_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
index 1c9a2f9d2e..689329ff42 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,14 @@
#ifndef __ARM_COMPUTE_NEBITWISEOR_H__
#define __ARM_COMPUTE_NEBITWISEOR_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
/** Basic function to run @ref NEBitwiseOrKernel */
-class NEBitwiseOr : public INESimpleFunction
+class NEBitwiseOr : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs and output
@@ -42,5 +42,5 @@ public:
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEBITWISEOR_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
index 4690f0a4e3..cc9f1ed47a 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,14 @@
#ifndef __ARM_COMPUTE_NEBITWISEXOR_H__
#define __ARM_COMPUTE_NEBITWISEXOR_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
/** Basic function to run @ref NEBitwiseXorKernel */
-class NEBitwiseXor : public INESimpleFunction
+class NEBitwiseXor : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs and output
@@ -42,5 +42,5 @@ public:
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEBITWISEXOR_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h
index 7133553e1d..b5942b2eb7 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelCombine.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NECHANNELCOMBINE_H__
#define __ARM_COMPUTE_NECHANNELCOMBINE_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -33,7 +33,7 @@ class ITensor;
using IImage = ITensor;
/**Basic function to run @ref NEChannelCombineKernel to perform channel combination. */
-class NEChannelCombine : public INESimpleFunction
+class NEChannelCombine : public INESimpleFunctionNoBorder
{
public:
/** Initialize function's inputs and outputs.
@@ -54,5 +54,5 @@ public:
*/
void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NECHANNELCOMBINE_H__*/
diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h
index 1620d3ad1b..ad2bd535e9 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelExtract.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
#define __ARM_COMPUTE_NECHANNELEXTRACT_H__
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -34,7 +34,7 @@ class ITensor;
using IImage = ITensor;
/**Basic function to run @ref NEChannelExtractKernel to perform channel extraction. */
-class NEChannelExtract : public INESimpleFunction
+class NEChannelExtract : public INESimpleFunctionNoBorder
{
public:
/** Initialize the function's source, destination
@@ -52,5 +52,5 @@ public:
*/
void configure(const IMultiImage *input, Channel channel, IImage *output);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NECHANNELEXTRACT_H__*/
diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
index 20fe483604..0a140d6791 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NECHANNELSHUFFLELAYER_H__
#define __ARM_COMPUTE_NECHANNELSHUFFLELAYER_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -37,7 +37,7 @@ class ITensor;
* first divide the channels into G groups, C = (G * C'), and perform a transpose of the channel, which gives C = (C' * G).
* for more details see: https://arxiv.org/pdf/1707.01083.pdf
*/
-class NEChannelShuffleLayer : public INESimpleFunction
+class NEChannelShuffleLayer : public INESimpleFunctionNoBorder
{
public:
/** Initialize the function
diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h
index 42876a8aec..64ce9944e2 100644
--- a/arm_compute/runtime/NEON/functions/NECol2Im.h
+++ b/arm_compute/runtime/NEON/functions/NECol2Im.h
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NECOL2IM_H__
#define __ARM_COMPUTE_NECOL2IM_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Types.h"
@@ -34,7 +34,7 @@ namespace arm_compute
class ITensor;
/** Basic function to run @ref NECol2Im */
-class NECol2Im : public INESimpleFunction
+class NECol2Im : public INESimpleFunctionNoBorder
{
public:
/** Configure the col2im NEON kernel
@@ -56,5 +56,5 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims);
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NECOL2IM_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h
index 73eb3f94ea..a3dd064d06 100644
--- a/arm_compute/runtime/NEON/functions/NEColorConvert.h
+++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NECOLORCONVERT_H__
#define __ARM_COMPUTE_NECOLORCONVERT_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -33,7 +33,7 @@ class IMultiImage;
using IImage = ITensor;
/**Basic function to run @ref NEColorConvertKernel to perform color conversion */
-class NEColorConvert : public INESimpleFunction
+class NEColorConvert : public INESimpleFunctionNoBorder
{
public:
/** Initialize the function's source, destination
@@ -63,5 +63,5 @@ public:
*/
void configure(const IMultiImage *input, IMultiImage *output);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NECOLORCONVERT_H__*/
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index df06b1ddbb..c476da54eb 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -25,14 +25,14 @@
#define __ARM_COMPUTE_NECOPY_H__
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
/** Basic function to run @ref NECopyKernel */
-class NECopy : public INESimpleFunction
+class NECopy : public INESimpleFunctionNoBorder
{
public:
/** Initialise the function's source and destination.
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index 1fdad30115..ebb9530c71 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -25,7 +25,7 @@
#define __ARM_COMPUTE_NEDEPTHCONVERT_H__
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include <cstdint>
@@ -34,7 +34,7 @@ namespace arm_compute
class ITensor;
/**Basic function to run @ref NEDepthConvertLayerKernel */
-class NEDepthConvertLayer : public INESimpleFunction
+class NEDepthConvertLayer : public INESimpleFunctionNoBorder
{
public:
/* Contructor */
@@ -69,5 +69,5 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
index 99e93ccece..1281238be9 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
@@ -26,7 +26,6 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
#include "arm_compute/runtime/Tensor.h"
@@ -76,5 +75,5 @@ private:
NEDepthwiseConvolutionLayer _depthwise_conv;
NEDirectConvolutionLayer _pointwise_conv;
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEON_DEPTHWISE_SEPARABLE_CONVOLUTION_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index 26d7c7f636..3365b3570b 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -25,14 +25,14 @@
#define __ARM_COMPUTE_NEFLATTENLAYER_H__
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
/** Basic function to execute flatten layer kernel. */
-class NEFlattenLayer : public INESimpleFunction
+class NEFlattenLayer : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's input and output.
diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h
index 92aa994ee3..630a7fcd0a 100644
--- a/arm_compute/runtime/NEON/functions/NEFloor.h
+++ b/arm_compute/runtime/NEON/functions/NEFloor.h
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NEFLOOR_H__
#define __ARM_COMPUTE_NEFLOOR_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/core/Types.h"
@@ -33,7 +33,7 @@ namespace arm_compute
class ITensor;
/** Basic function to run @ref NEFloorKernel */
-class NEFloor : public INESimpleFunction
+class NEFloor : public INESimpleFunctionNoBorder
{
public:
/** Set the source, destination of the kernel
@@ -51,5 +51,5 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEFLOOR_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 9c9074ceec..56ce274572 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -44,7 +44,7 @@ namespace arm_compute
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
-class NEFullyConnectedLayerReshapeWeights : public INESimpleFunction
+class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensors.
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
index 4a6bec03e6..4d7f67b949 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__
#define __ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -35,7 +35,7 @@ class ITensor;
* -# @ref NEGEMMInterleave4x4Kernel
*
*/
-class NEGEMMInterleave4x4 : public INESimpleFunction
+class NEGEMMInterleave4x4 : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs, output
@@ -45,5 +45,5 @@ public:
*/
void configure(const ITensor *input, ITensor *output);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index 53b91b35b6..77bfb98ba6 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H__
#define __ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
/** This file contains all available output stages for GEMMLowp on NEON.
*
@@ -56,7 +56,7 @@ class ITensor;
* @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
* after the result is shifted right by result_shift
*/
-class NEGEMMLowpQuantizeDownInt32ToUint8Scale : public INESimpleFunction
+class NEGEMMLowpQuantizeDownInt32ToUint8Scale : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs, output
@@ -116,7 +116,7 @@ public:
* @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
* after the result is shifted right by result_shift
*/
-class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunction
+class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs, output
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
index 3f8e731d01..b44c5a3ee3 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
@@ -24,16 +24,18 @@
#ifndef __ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__
#define __ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
+class ITensor;
+
/** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels:
*
* -# @ref NEGEMMTranspose1xWKernel
*
*/
-class NEGEMMTranspose1xW : public INESimpleFunction
+class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs, output
@@ -51,5 +53,5 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
index 98b8a89bc1..f41e49b1ab 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,7 +26,7 @@
#include "arm_compute/core/IHOG.h"
#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -35,7 +35,7 @@ namespace arm_compute
* -# @ref NEHOGDetectorKernel
*
*/
-class NEHOGDetector : public INESimpleFunction
+class NEHOGDetector : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
@@ -52,6 +52,6 @@ public:
*/
void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0);
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEHOGDETECTOR_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
index de4780f8f0..e281dce0b1 100644
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NEIM2COL_H__
#define __ARM_COMPUTE_NEIM2COL_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
#include "arm_compute/core/Size2D.h"
@@ -78,5 +78,5 @@ private:
NEIm2ColKernel _kernel;
unsigned int _y_dim;
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEIM2COL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index 9c4ab2b068..b98e74d969 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -31,7 +31,6 @@
#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h
index 6aabe9dfa4..9fe043b38f 100644
--- a/arm_compute/runtime/NEON/functions/NEMagnitude.h
+++ b/arm_compute/runtime/NEON/functions/NEMagnitude.h
@@ -24,14 +24,14 @@
#ifndef __ARM_COMPUTE_NEMAGNITUDE_H__
#define __ARM_COMPUTE_NEMAGNITUDE_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
-/** Basic function to run NEMagnitudePhaseKernel */
-class NEMagnitude : public INESimpleFunction
+/** Basic function to run @ref NEMagnitudePhaseKernel */
+class NEMagnitude : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs.
@@ -43,5 +43,5 @@ public:
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEMAGNITUDE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 580d24e415..43789e6c4a 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NEPERMUTE_H__
#define __ARM_COMPUTE_NEPERMUTE_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/core/Types.h"
@@ -33,7 +33,7 @@ namespace arm_compute
class ITensor;
/** Basic function to run @ref NEPermuteKernel */
-class NEPermute : public INESimpleFunction
+class NEPermute : public INESimpleFunctionNoBorder
{
public:
/** Configure the permute NEON kernel
@@ -57,5 +57,5 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEPERMUTE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h
index cd62cf98e8..d096cf82e0 100644
--- a/arm_compute/runtime/NEON/functions/NEPhase.h
+++ b/arm_compute/runtime/NEON/functions/NEPhase.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,14 @@
#ifndef __ARM_COMPUTE_NEPHASE_H__
#define __ARM_COMPUTE_NEPHASE_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
-/** Basic function to run NEMagnitudePhaseKernel */
-class NEPhase : public INESimpleFunction
+/** Basic function to run @ref NEMagnitudePhaseKernel */
+class NEPhase : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs, output.
@@ -43,5 +43,5 @@ public:
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type = PhaseType::SIGNED);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEPHASE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
index 34ba39d960..a7a2034777 100644
--- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
@@ -26,14 +26,14 @@
#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
/** Basic function to run @ref NEPriorBoxLayerKernel. */
-class NEPriorBoxLayer : public INESimpleFunction
+class NEPriorBoxLayer : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensors.
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index bdba42d6ba..ec394392de 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -27,7 +27,6 @@
#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
index a73752832b..716f164cc7 100644
--- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
@@ -25,7 +25,7 @@
#define __ARM_COMPUTE_NEREORGLAYER_H__
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -33,7 +33,7 @@ namespace arm_compute
class ITensor;
/** Basic function to run @ref NEReorgLayerKernel */
-class NEReorgLayer : public INESimpleFunction
+class NEReorgLayer : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs and outputs
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index 01fe3bd091..8896b4f5c6 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -25,14 +25,14 @@
#define __ARM_COMPUTE_NERESHAPELAYER_H__
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
class ITensor;
/** Basic function to run @ref NEReshapeLayerKernel */
-class NEReshapeLayer : public INESimpleFunction
+class NEReshapeLayer : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs and outputs
@@ -51,5 +51,5 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NERESHAPELAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h
index b59ffb877c..2a49aee3e7 100644
--- a/arm_compute/runtime/NEON/functions/NETableLookup.h
+++ b/arm_compute/runtime/NEON/functions/NETableLookup.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NETABLELOOKUP_H__
#define __ARM_COMPUTE_NETABLELOOKUP_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -32,7 +32,7 @@ class ITensor;
class ILut;
/** Basic function to run @ref NETableLookupKernel */
-class NETableLookup : public INESimpleFunction
+class NETableLookup : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs and output
@@ -43,5 +43,5 @@ public:
*/
void configure(const ITensor *input, const ILut *lut, ITensor *output);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NETABLELOOKUP_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h
index d407ee5b15..69a437b68c 100644
--- a/arm_compute/runtime/NEON/functions/NEThreshold.h
+++ b/arm_compute/runtime/NEON/functions/NEThreshold.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
#define __ARM_COMPUTE_NETHRESHOLD_H__
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include <cstdint>
@@ -34,7 +34,7 @@ namespace arm_compute
class ITensor;
/** Basic function to run @ref NEThresholdKernel */
-class NEThreshold : public INESimpleFunction
+class NEThreshold : public INESimpleFunctionNoBorder
{
public:
/** Initialise the function's source, destination, thresholds and threshold type
@@ -50,5 +50,5 @@ public:
void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0,
ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NETHRESHOLD_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 0234288b4b..08ee3a6d8d 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -25,7 +25,7 @@
#define __ARM_COMPUTE_NETRANSPOSE_H__
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
namespace arm_compute
{
@@ -36,7 +36,7 @@ class ITensor;
* -# @ref NETransposeKernel
*
*/
-class NETranspose : public INESimpleFunction
+class NETranspose : public INESimpleFunctionNoBorder
{
public:
/** Initialise the kernel's inputs and output
@@ -54,6 +54,6 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_NETRANSPOSE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
index e09dd42f8f..0adc0f1d9a 100644
--- a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
@@ -24,7 +24,7 @@
#ifndef __ARM_COMPUTE_NEYOLOLAYER_H__
#define __ARM_COMPUTE_NEYOLOLAYER_H__
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
#include "arm_compute/core/Types.h"
@@ -34,7 +34,7 @@ namespace arm_compute
class ITensor;
/** Basic function to run @ref NEYOLOLayerKernel */
-class NEYOLOLayer : public INESimpleFunction
+class NEYOLOLayer : public INESimpleFunctionNoBorder
{
public:
/** Set the input and output tensor.
diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp
index aef4d4865a..39bcd996f9 100644
--- a/src/core/NEON/kernels/NEFillBorderKernel.cpp
+++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp
@@ -34,15 +34,12 @@
#include <algorithm>
#include <cstdint>
-using namespace arm_compute;
-
+namespace arm_compute
+{
+class Coordinates;
namespace
{
-template <typename T, unsigned int leftx, unsigned int rightx>
-void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value);
-
-template <>
-inline void fill_constant_value_single_channel_special<float, 1u, 1u>(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value)
+inline void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value)
{
float border_value;
constant_border_value.get(border_value);
@@ -93,11 +90,6 @@ inline void fill_constant_value_single_channel_special<float, 1u, 1u>(ITensor *t
}
} // namespace
-namespace arm_compute
-{
-class Coordinates;
-} // namespace arm_compute
-
NEFillBorderKernel::NEFillBorderKernel()
: _tensor(nullptr), _border_size(0), _mode(BorderMode::UNDEFINED), _constant_border_value(static_cast<float>(0.f))
{
@@ -142,81 +134,19 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
{
case BorderMode::CONSTANT:
{
- switch(_tensor->info()->data_type())
+ if(_border_size.left == 1 && _border_size.top == 1 && _tensor->info()->data_type() == DataType::F32)
{
- case DataType::QASYMM8:
- case DataType::U8:
- fill_constant_value_single_channel<uint8_t>(window);
- break;
- case DataType::S8:
- fill_constant_value_single_channel<int8_t>(window);
- break;
- case DataType::U16:
- fill_constant_value_single_channel<uint16_t>(window);
- break;
- case DataType::S16:
- fill_constant_value_single_channel<int16_t>(window);
- break;
- case DataType::U32:
- fill_constant_value_single_channel<uint32_t>(window);
- break;
- case DataType::S32:
- fill_constant_value_single_channel<int32_t>(window);
- break;
- case DataType::F16:
- static_assert(sizeof(half) == 2, "Float16_t must be 16 bit");
- fill_constant_value_single_channel<half>(window);
- break;
- case DataType::F32:
- static_assert(sizeof(float) == 4, "Float must be 32 bit");
- if(_border_size.left == 1 && _border_size.top == 1)
- {
- fill_constant_value_single_channel_special<float, 1u, 1u>(_tensor, window, _border_size.right, _border_size.bottom, _constant_border_value);
- }
- else
- {
- fill_constant_value_single_channel<float>(window);
- }
- break;
- default:
- ARM_COMPUTE_ERROR("Not handled");
+ fill_constant_value_single_channel_special(_tensor, window, _border_size.right, _border_size.bottom, _constant_border_value);
+ }
+ else
+ {
+ fill_constant_value_single_channel(window);
}
break;
}
case BorderMode::REPLICATE:
{
- switch(_tensor->info()->data_type())
- {
- case DataType::QASYMM8:
- case DataType::U8:
- fill_replicate_single_channel<uint8_t>(window);
- break;
- case DataType::S8:
- fill_replicate_single_channel<int8_t>(window);
- break;
- case DataType::U16:
- fill_replicate_single_channel<uint16_t>(window);
- break;
- case DataType::S16:
- fill_replicate_single_channel<int16_t>(window);
- break;
- case DataType::U32:
- fill_replicate_single_channel<uint32_t>(window);
- break;
- case DataType::S32:
- fill_replicate_single_channel<int32_t>(window);
- break;
- case DataType::F16:
- static_assert(sizeof(half) == 2, "Float16_t must be 16 bit");
- fill_replicate_single_channel<half>(window);
- break;
- case DataType::F32:
- static_assert(sizeof(float) == 4, "Float must be 32 bit");
- fill_replicate_single_channel<float>(window);
- break;
- default:
- ARM_COMPUTE_ERROR("Not handled");
- }
+ fill_replicate_single_channel(window);
break;
}
case BorderMode::UNDEFINED:
@@ -226,13 +156,12 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
}
}
-template <typename T>
void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
{
uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
const size_t width = _tensor->info()->valid_region().shape[0];
const size_t height = _tensor->info()->valid_region().shape[1];
-
+ const size_t element_size = _tensor->info()->element_size();
// Left and right border
Window vertical(window);
vertical.set(Window::DimY, Window::Dimension(0, height, 1));
@@ -241,13 +170,17 @@ void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
execute_window_loop(vertical, [&](const Coordinates & id)
{
- const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset());
- const auto left_val = *reinterpret_cast<T *>(vertical_it.ptr());
- const auto right_val = *(reinterpret_cast<T *>(vertical_it.ptr()) + width - 1);
-
+ uint8_t *base_addr = start_valid_region + vertical_it.offset();
// Fill left and right borders
- std::fill_n(row_start - _border_size.left, _border_size.left, left_val);
- std::fill_n(row_start + width, _border_size.right, right_val);
+ for(unsigned int i = 0; i < _border_size.left; ++i)
+ {
+ std::memcpy(base_addr + static_cast<int>(i - _border_size.left) * element_size, vertical_it.ptr(), element_size);
+ }
+
+ for(unsigned int i = 0; i < _border_size.right; ++i)
+ {
+ std::memcpy(base_addr + (width + i) * element_size, vertical_it.ptr() + (width - 1) * element_size, element_size);
+ }
},
vertical_it);
@@ -257,41 +190,33 @@ void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
// Iterate over all XY planes
execute_window_loop(window, [&](const Coordinates & id)
{
- const auto first_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset());
-
+ uint8_t *base_addr = start_valid_region + plane_it.offset();
// Top border
for(int i = -_border_size.top; i < 0; ++i)
{
- const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
-
// Copy top rows including left/right borders
- std::copy_n(first_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left);
+ std::memcpy(base_addr + i * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size,
+ base_addr - _border_size.left * element_size, (_border_size.left + width + _border_size.right) * element_size);
}
- const auto last_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + (height - 1) * _tensor->info()->strides_in_bytes()[1]);
-
// Bottom border
for(unsigned int i = height; i < height + _border_size.bottom; ++i)
{
- const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
-
// Copy bottom rows including left/right borders
- std::copy_n(last_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left);
+ std::memcpy(base_addr + i * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size,
+ base_addr + (height - 1) * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size, (_border_size.left + width + _border_size.right) * element_size);
}
},
plane_it);
}
-template <typename T>
void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window)
{
- T constant_border_value;
- _constant_border_value.get(constant_border_value);
-
uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
const size_t width = _tensor->info()->valid_region().shape[0];
const size_t height = _tensor->info()->valid_region().shape[1];
const int stridey = _tensor->info()->strides_in_bytes()[1];
+ const size_t element_size = _tensor->info()->element_size();
// Left and right border
Window vertical(window);
@@ -301,11 +226,17 @@ void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window
execute_window_loop(vertical, [&](const Coordinates & id)
{
- const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset());
-
+ uint8_t *base_addr = start_valid_region + vertical_it.offset();
// Fill left and right borders
- std::fill_n(row_start - _border_size.left, _border_size.left, constant_border_value);
- std::fill_n(row_start + width, _border_size.right, constant_border_value);
+ for(unsigned int i = 0; i < _border_size.left; ++i)
+ {
+ std::memcpy(base_addr + static_cast<int>(i - _border_size.left) * element_size, &_constant_border_value, element_size);
+ }
+
+ for(unsigned int i = 0; i < _border_size.right; ++i)
+ {
+ std::memcpy(base_addr + (width + i) * element_size, &_constant_border_value, element_size);
+ }
},
vertical_it);
@@ -319,21 +250,24 @@ void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window
// Top border
for(int i = -_border_size.top; i < 0; ++i)
{
- const auto row_start = reinterpret_cast<T *>(base_addr + i * stridey);
-
// Fill top rows including left/right borders
- std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value);
+ for(unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j)
+ {
+ std::memcpy(base_addr + i * stridey + static_cast<int>(j - _border_size.left) * element_size, &_constant_border_value, element_size);
+ }
}
// Bottom border
const unsigned low_border_size = height + _border_size.bottom;
for(unsigned int i = height; i < low_border_size; ++i)
{
- const auto row_start = reinterpret_cast<T *>(base_addr + i * stridey);
-
// Fill bottom rows including left/right borders
- std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value);
+ for(unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j)
+ {
+ std::memcpy(base_addr + i * stridey + static_cast<int>(j - _border_size.left) * element_size, &_constant_border_value, element_size);
+ }
}
},
plane_it);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp
new file mode 100644
index 0000000000..12872048c7
--- /dev/null
+++ b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+namespace arm_compute
+{
+INESimpleFunctionNoBorder::INESimpleFunctionNoBorder() // NOLINT
+ : _kernel()
+{
+}
+
+void INESimpleFunctionNoBorder::run()
+{
+ NEScheduler::get().schedule(_kernel.get(), Window::DimY);
+}
+} // namespace arm_compute