diff options
38 files changed, 236 insertions, 218 deletions
diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h index cff6b4ea2d..4769cfa121 100644 --- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h +++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h @@ -69,9 +69,7 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: - template <typename T> void fill_replicate_single_channel(const Window &window); - template <typename T> void fill_constant_value_single_channel(const Window &window); ITensor *_tensor; diff --git a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h new file mode 100644 index 0000000000..6765b5f937 --- /dev/null +++ b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H__ +#define __ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include <memory> + +namespace arm_compute +{ +/** Basic interface for functions which have a single NEON kernel and no border */ +class INESimpleFunctionNoBorder : public IFunction +{ +public: + /** Constructor */ + INESimpleFunctionNoBorder(); + + // Inherited methods overridden: + void run() override final; + +protected: + std::unique_ptr<INEKernel> _kernel; /**< Kernel to run */ +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h index de532c37a0..36b8bec3aa 100644 --- a/arm_compute/runtime/NEON/functions/NEAccumulate.h +++ b/arm_compute/runtime/NEON/functions/NEAccumulate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NEACCUMULATE_H__ #define __ARM_COMPUTE_NEACCUMULATE_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include <cstdint> @@ -33,7 +33,7 @@ namespace arm_compute class ITensor; /** Basic function to run @ref NEAccumulateKernel */ -class NEAccumulate : public INESimpleFunction +class NEAccumulate : public INESimpleFunctionNoBorder { public: /** Set the input and accumulation tensors @@ -45,7 +45,7 @@ public: }; /** Basic function to run @ref NEAccumulateWeightedKernel */ -class NEAccumulateWeighted : public INESimpleFunction +class NEAccumulateWeighted : public INESimpleFunctionNoBorder { public: /** Set the input and accumulation tensors, and the scale value @@ -59,7 +59,7 @@ public: }; /** Basic function to run @ref NEAccumulateSquaredKernel */ -class NEAccumulateSquared : public INESimpleFunction +class NEAccumulateSquared : public INESimpleFunctionNoBorder { public: /** Set the input and accumulation tensors and the shift value. @@ -70,5 +70,5 @@ public: */ void configure(const ITensor *input, uint32_t shift, ITensor *output); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NEACCUMULATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index a65146d461..588de04332 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NEACTIVATIONLAYER_H__ #define __ARM_COMPUTE_NEACTIVATIONLAYER_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/core/Types.h" @@ -36,7 +36,7 @@ class ITensor; * * @note The function simulates an activation layer with the specified activation function. */ -class NEActivationLayer : public INESimpleFunction +class NEActivationLayer : public INESimpleFunctionNoBorder { public: /** Set the input and output tensor. @@ -60,5 +60,5 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEACTIVATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h index 0250293e97..bdcbaba3fa 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,14 @@ #ifndef __ARM_COMPUTE_NEBITWISEAND_H__ #define __ARM_COMPUTE_NEBITWISEAND_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; /** Basic function to run @ref NEBitwiseAndKernel */ -class NEBitwiseAnd : public INESimpleFunction +class NEBitwiseAnd : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and output @@ -42,5 +42,5 @@ public: */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output); }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEBITWISEAND_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h index 62c08ffcf9..c2321a89b8 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,14 @@ #ifndef __ARM_COMPUTE_NEBITWISENOT_H__ #define __ARM_COMPUTE_NEBITWISENOT_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; /** Basic function to run @ref NEBitwiseNotKernel */ -class NEBitwiseNot : public INESimpleFunction +class NEBitwiseNot : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's input and output @@ -41,5 +41,5 @@ public: */ void configure(const ITensor *input, ITensor *output); }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEBITWISENOT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h index 1c9a2f9d2e..689329ff42 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,14 @@ #ifndef __ARM_COMPUTE_NEBITWISEOR_H__ #define __ARM_COMPUTE_NEBITWISEOR_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; /** Basic function to run @ref NEBitwiseOrKernel */ -class NEBitwiseOr : public INESimpleFunction +class NEBitwiseOr : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and output @@ -42,5 +42,5 @@ public: */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output); }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEBITWISEOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h index 4690f0a4e3..cc9f1ed47a 100644 --- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h +++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,14 @@ #ifndef __ARM_COMPUTE_NEBITWISEXOR_H__ #define __ARM_COMPUTE_NEBITWISEXOR_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; /** Basic function to run @ref NEBitwiseXorKernel */ -class NEBitwiseXor : public INESimpleFunction +class NEBitwiseXor : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and output @@ -42,5 +42,5 @@ public: */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output); }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEBITWISEXOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h index 7133553e1d..b5942b2eb7 100644 --- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h +++ b/arm_compute/runtime/NEON/functions/NEChannelCombine.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NECHANNELCOMBINE_H__ #define __ARM_COMPUTE_NECHANNELCOMBINE_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -33,7 +33,7 @@ class ITensor; using IImage = ITensor; /**Basic function to run @ref NEChannelCombineKernel to perform channel combination. */ -class NEChannelCombine : public INESimpleFunction +class NEChannelCombine : public INESimpleFunctionNoBorder { public: /** Initialize function's inputs and outputs. @@ -54,5 +54,5 @@ public: */ void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NECHANNELCOMBINE_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h index 1620d3ad1b..ad2bd535e9 100644 --- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h +++ b/arm_compute/runtime/NEON/functions/NEChannelExtract.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #define __ARM_COMPUTE_NECHANNELEXTRACT_H__ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -34,7 +34,7 @@ class ITensor; using IImage = ITensor; /**Basic function to run @ref NEChannelExtractKernel to perform channel extraction. */ -class NEChannelExtract : public INESimpleFunction +class NEChannelExtract : public INESimpleFunctionNoBorder { public: /** Initialize the function's source, destination @@ -52,5 +52,5 @@ public: */ void configure(const IMultiImage *input, Channel channel, IImage *output); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NECHANNELEXTRACT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h index 20fe483604..0a140d6791 100644 --- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h +++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NECHANNELSHUFFLELAYER_H__ #define __ARM_COMPUTE_NECHANNELSHUFFLELAYER_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -37,7 +37,7 @@ class ITensor; * first divide the channels into G groups, C = (G * C'), and perform a transpose of the channel, which gives C = (C' * G). * for more details see: https://arxiv.org/pdf/1707.01083.pdf */ -class NEChannelShuffleLayer : public INESimpleFunction +class NEChannelShuffleLayer : public INESimpleFunctionNoBorder { public: /** Initialize the function diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h index 42876a8aec..64ce9944e2 100644 --- a/arm_compute/runtime/NEON/functions/NECol2Im.h +++ b/arm_compute/runtime/NEON/functions/NECol2Im.h @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NECOL2IM_H__ #define __ARM_COMPUTE_NECOL2IM_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Types.h" @@ -34,7 +34,7 @@ namespace arm_compute class ITensor; /** Basic function to run @ref NECol2Im */ -class NECol2Im : public INESimpleFunction +class NECol2Im : public INESimpleFunctionNoBorder { public: /** Configure the col2im NEON kernel @@ -56,5 +56,5 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims); }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NECOL2IM_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h index 73eb3f94ea..a3dd064d06 100644 --- a/arm_compute/runtime/NEON/functions/NEColorConvert.h +++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NECOLORCONVERT_H__ #define __ARM_COMPUTE_NECOLORCONVERT_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -33,7 +33,7 @@ class IMultiImage; using IImage = ITensor; /**Basic function to run @ref NEColorConvertKernel to perform color conversion */ -class NEColorConvert : public INESimpleFunction +class NEColorConvert : public INESimpleFunctionNoBorder { public: /** Initialize the function's source, destination @@ -63,5 +63,5 @@ public: */ void configure(const IMultiImage *input, IMultiImage *output); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NECOLORCONVERT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h index df06b1ddbb..c476da54eb 100644 --- a/arm_compute/runtime/NEON/functions/NECopy.h +++ b/arm_compute/runtime/NEON/functions/NECopy.h @@ -25,14 +25,14 @@ #define __ARM_COMPUTE_NECOPY_H__ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; /** Basic function to run @ref NECopyKernel */ -class NECopy : public INESimpleFunction +class NECopy : public INESimpleFunctionNoBorder { public: /** Initialise the function's source and destination. diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h index 1fdad30115..ebb9530c71 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h @@ -25,7 +25,7 @@ #define __ARM_COMPUTE_NEDEPTHCONVERT_H__ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include <cstdint> @@ -34,7 +34,7 @@ namespace arm_compute class ITensor; /**Basic function to run @ref NEDepthConvertLayerKernel */ -class NEDepthConvertLayer : public INESimpleFunction +class NEDepthConvertLayer : public INESimpleFunctionNoBorder { public: /* Contructor */ @@ -69,5 +69,5 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/ diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h index 99e93ccece..1281238be9 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h @@ -26,7 +26,6 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" #include "arm_compute/runtime/Tensor.h" @@ -76,5 +75,5 @@ private: NEDepthwiseConvolutionLayer _depthwise_conv; NEDirectConvolutionLayer _pointwise_conv; }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NEON_DEPTHWISE_SEPARABLE_CONVOLUTION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h index 26d7c7f636..3365b3570b 100644 --- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h @@ -25,14 +25,14 @@ #define __ARM_COMPUTE_NEFLATTENLAYER_H__ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; /** Basic function to execute flatten layer kernel. */ -class NEFlattenLayer : public INESimpleFunction +class NEFlattenLayer : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's input and output. diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h index 92aa994ee3..630a7fcd0a 100644 --- a/arm_compute/runtime/NEON/functions/NEFloor.h +++ b/arm_compute/runtime/NEON/functions/NEFloor.h @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NEFLOOR_H__ #define __ARM_COMPUTE_NEFLOOR_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/core/Types.h" @@ -33,7 +33,7 @@ namespace arm_compute class ITensor; /** Basic function to run @ref NEFloorKernel */ -class NEFloor : public INESimpleFunction +class NEFloor : public INESimpleFunctionNoBorder { public: /** Set the source, destination of the kernel @@ -51,5 +51,5 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEFLOOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 9c9074ceec..56ce274572 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -44,7 +44,7 @@ namespace arm_compute * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ -class NEFullyConnectedLayerReshapeWeights : public INESimpleFunction +class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h index 4a6bec03e6..4d7f67b949 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ #define __ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -35,7 +35,7 @@ class ITensor; * -# @ref NEGEMMInterleave4x4Kernel * */ -class NEGEMMInterleave4x4 : public INESimpleFunction +class NEGEMMInterleave4x4 : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs, output @@ -45,5 +45,5 @@ public: */ void configure(const ITensor *input, ITensor *output); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NEGEMMINTERLEAVE4X4_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h index 53b91b35b6..77bfb98ba6 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H__ #define __ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" /** This file contains all available output stages for GEMMLowp on NEON. * @@ -56,7 +56,7 @@ class ITensor; * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions * after the result is shifted right by result_shift */ -class NEGEMMLowpQuantizeDownInt32ToUint8Scale : public INESimpleFunction +class NEGEMMLowpQuantizeDownInt32ToUint8Scale : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs, output @@ -116,7 +116,7 @@ public: * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions * after the result is shifted right by result_shift */ -class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunction +class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs, output diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h index 3f8e731d01..b44c5a3ee3 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h @@ -24,16 +24,18 @@ #ifndef __ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ #define __ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { +class ITensor; + /** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels: * * -# @ref NEGEMMTranspose1xWKernel * */ -class NEGEMMTranspose1xW : public INESimpleFunction +class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs, output @@ -51,5 +53,5 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h index 98b8a89bc1..f41e49b1ab 100644 --- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h +++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,7 @@ #include "arm_compute/core/IHOG.h" #include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -35,7 +35,7 @@ namespace arm_compute * -# @ref NEHOGDetectorKernel * */ -class NEHOGDetector : public INESimpleFunction +class NEHOGDetector : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class @@ -52,6 +52,6 @@ public: */ void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0); }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEHOGDETECTOR_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h index de4780f8f0..e281dce0b1 100644 --- a/arm_compute/runtime/NEON/functions/NEIm2Col.h +++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NEIM2COL_H__ #define __ARM_COMPUTE_NEIM2COL_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/Size2D.h" @@ -78,5 +78,5 @@ private: NEIm2ColKernel _kernel; unsigned int _y_dim; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEIM2COL_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index 9c4ab2b068..b98e74d969 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -31,7 +31,6 @@ #include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h index 6aabe9dfa4..9fe043b38f 100644 --- a/arm_compute/runtime/NEON/functions/NEMagnitude.h +++ b/arm_compute/runtime/NEON/functions/NEMagnitude.h @@ -24,14 +24,14 @@ #ifndef __ARM_COMPUTE_NEMAGNITUDE_H__ #define __ARM_COMPUTE_NEMAGNITUDE_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; -/** Basic function to run NEMagnitudePhaseKernel */ -class NEMagnitude : public INESimpleFunction +/** Basic function to run @ref NEMagnitudePhaseKernel */ +class NEMagnitude : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs. @@ -43,5 +43,5 @@ public: */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NEMAGNITUDE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h index 580d24e415..43789e6c4a 100644 --- a/arm_compute/runtime/NEON/functions/NEPermute.h +++ b/arm_compute/runtime/NEON/functions/NEPermute.h @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NEPERMUTE_H__ #define __ARM_COMPUTE_NEPERMUTE_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/core/Types.h" @@ -33,7 +33,7 @@ namespace arm_compute class ITensor; /** Basic function to run @ref NEPermuteKernel */ -class NEPermute : public INESimpleFunction +class NEPermute : public INESimpleFunctionNoBorder { public: /** Configure the permute NEON kernel @@ -57,5 +57,5 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NEPERMUTE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h index cd62cf98e8..d096cf82e0 100644 --- a/arm_compute/runtime/NEON/functions/NEPhase.h +++ b/arm_compute/runtime/NEON/functions/NEPhase.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,14 @@ #ifndef __ARM_COMPUTE_NEPHASE_H__ #define __ARM_COMPUTE_NEPHASE_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; -/** Basic function to run NEMagnitudePhaseKernel */ -class NEPhase : public INESimpleFunction +/** Basic function to run @ref NEMagnitudePhaseKernel */ +class NEPhase : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs, output. @@ -43,5 +43,5 @@ public: */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, PhaseType phase_type = PhaseType::SIGNED); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NEPHASE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h index 34ba39d960..a7a2034777 100644 --- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h @@ -26,14 +26,14 @@ #include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; /** Basic function to run @ref NEPriorBoxLayerKernel. */ -class NEPriorBoxLayer : public INESimpleFunction +class NEPriorBoxLayer : public INESimpleFunctionNoBorder { public: /** Set the input and output tensors. diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h index bdba42d6ba..ec394392de 100644 --- a/arm_compute/runtime/NEON/functions/NERNNLayer.h +++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h @@ -27,7 +27,6 @@ #include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NECopyKernel.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h index a73752832b..716f164cc7 100644 --- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h @@ -25,7 +25,7 @@ #define __ARM_COMPUTE_NEREORGLAYER_H__ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -33,7 +33,7 @@ namespace arm_compute class ITensor; /** Basic function to run @ref NEReorgLayerKernel */ -class NEReorgLayer : public INESimpleFunction +class NEReorgLayer : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and outputs diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h index 01fe3bd091..8896b4f5c6 100644 --- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h @@ -25,14 +25,14 @@ #define __ARM_COMPUTE_NERESHAPELAYER_H__ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { class ITensor; /** Basic function to run @ref NEReshapeLayerKernel */ -class NEReshapeLayer : public INESimpleFunction +class NEReshapeLayer : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and outputs @@ -51,5 +51,5 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NERESHAPELAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h index b59ffb877c..2a49aee3e7 100644 --- a/arm_compute/runtime/NEON/functions/NETableLookup.h +++ b/arm_compute/runtime/NEON/functions/NETableLookup.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NETABLELOOKUP_H__ #define __ARM_COMPUTE_NETABLELOOKUP_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -32,7 +32,7 @@ class ITensor; class ILut; /** Basic function to run @ref NETableLookupKernel */ -class NETableLookup : public INESimpleFunction +class NETableLookup : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and output @@ -43,5 +43,5 @@ public: */ void configure(const ITensor *input, const ILut *lut, ITensor *output); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NETABLELOOKUP_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h index d407ee5b15..69a437b68c 100644 --- a/arm_compute/runtime/NEON/functions/NEThreshold.h +++ b/arm_compute/runtime/NEON/functions/NEThreshold.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #define __ARM_COMPUTE_NETHRESHOLD_H__ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include <cstdint> @@ -34,7 +34,7 @@ namespace arm_compute class ITensor; /** Basic function to run @ref NEThresholdKernel */ -class NEThreshold : public INESimpleFunction +class NEThreshold : public INESimpleFunctionNoBorder { public: /** Initialise the function's source, destination, thresholds and threshold type @@ -50,5 +50,5 @@ public: void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0, ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_NETHRESHOLD_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h index 0234288b4b..08ee3a6d8d 100644 --- a/arm_compute/runtime/NEON/functions/NETranspose.h +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -25,7 +25,7 @@ #define __ARM_COMPUTE_NETRANSPOSE_H__ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" namespace arm_compute { @@ -36,7 +36,7 @@ class ITensor; * -# @ref NETransposeKernel * */ -class NETranspose : public INESimpleFunction +class NETranspose : public INESimpleFunctionNoBorder { public: /** Initialise the kernel's inputs and output @@ -54,6 +54,6 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NETRANSPOSE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h index e09dd42f8f..0adc0f1d9a 100644 --- a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h +++ b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h @@ -24,7 +24,7 @@ #ifndef __ARM_COMPUTE_NEYOLOLAYER_H__ #define __ARM_COMPUTE_NEYOLOLAYER_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" #include "arm_compute/core/Types.h" @@ -34,7 +34,7 @@ namespace arm_compute class ITensor; /** Basic function to run @ref NEYOLOLayerKernel */ -class NEYOLOLayer : public INESimpleFunction +class NEYOLOLayer : public INESimpleFunctionNoBorder { public: /** Set the input and output tensor. diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index aef4d4865a..39bcd996f9 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -34,15 +34,12 @@ #include <algorithm> #include <cstdint> -using namespace arm_compute; - +namespace arm_compute +{ +class Coordinates; namespace { -template <typename T, unsigned int leftx, unsigned int rightx> -void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value); - -template <> -inline void fill_constant_value_single_channel_special<float, 1u, 1u>(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value) +inline void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value) { float border_value; constant_border_value.get(border_value); @@ -93,11 +90,6 @@ inline void fill_constant_value_single_channel_special<float, 1u, 1u>(ITensor *t } } // namespace -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - NEFillBorderKernel::NEFillBorderKernel() : _tensor(nullptr), _border_size(0), _mode(BorderMode::UNDEFINED), _constant_border_value(static_cast<float>(0.f)) { @@ -142,81 +134,19 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info) { case BorderMode::CONSTANT: { - switch(_tensor->info()->data_type()) + if(_border_size.left == 1 && _border_size.top == 1 && _tensor->info()->data_type() == DataType::F32) { - case DataType::QASYMM8: - case DataType::U8: - fill_constant_value_single_channel<uint8_t>(window); - break; - case DataType::S8: - fill_constant_value_single_channel<int8_t>(window); - break; - case DataType::U16: - fill_constant_value_single_channel<uint16_t>(window); - break; - case DataType::S16: - fill_constant_value_single_channel<int16_t>(window); - break; - case DataType::U32: - fill_constant_value_single_channel<uint32_t>(window); - break; - case DataType::S32: - fill_constant_value_single_channel<int32_t>(window); - break; - case DataType::F16: - static_assert(sizeof(half) == 2, "Float16_t must be 16 bit"); - fill_constant_value_single_channel<half>(window); - break; - case DataType::F32: - static_assert(sizeof(float) == 4, "Float must be 32 bit"); - if(_border_size.left == 1 && _border_size.top == 1) - { - fill_constant_value_single_channel_special<float, 1u, 1u>(_tensor, window, _border_size.right, _border_size.bottom, _constant_border_value); - } - else - { - fill_constant_value_single_channel<float>(window); - } - break; - default: - ARM_COMPUTE_ERROR("Not handled"); + fill_constant_value_single_channel_special(_tensor, window, _border_size.right, _border_size.bottom, _constant_border_value); + } + else + { + fill_constant_value_single_channel(window); } break; } case BorderMode::REPLICATE: { - switch(_tensor->info()->data_type()) - { - case DataType::QASYMM8: - case DataType::U8: - fill_replicate_single_channel<uint8_t>(window); - break; - case DataType::S8: - fill_replicate_single_channel<int8_t>(window); - break; - case DataType::U16: - fill_replicate_single_channel<uint16_t>(window); - break; - case DataType::S16: - fill_replicate_single_channel<int16_t>(window); - break; - case DataType::U32: - fill_replicate_single_channel<uint32_t>(window); - break; - case DataType::S32: - fill_replicate_single_channel<int32_t>(window); - break; - case DataType::F16: - static_assert(sizeof(half) == 2, "Float16_t must be 16 bit"); - fill_replicate_single_channel<half>(window); - break; - case DataType::F32: - static_assert(sizeof(float) == 4, "Float must be 32 bit"); - fill_replicate_single_channel<float>(window); - break; - default: - ARM_COMPUTE_ERROR("Not handled"); - } + fill_replicate_single_channel(window); break; } case BorderMode::UNDEFINED: @@ -226,13 +156,12 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info) } } -template <typename T> void NEFillBorderKernel::fill_replicate_single_channel(const Window &window) { uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor); const size_t width = _tensor->info()->valid_region().shape[0]; const size_t height = _tensor->info()->valid_region().shape[1]; - + const size_t element_size = _tensor->info()->element_size(); // Left and right border Window vertical(window); vertical.set(Window::DimY, Window::Dimension(0, height, 1)); @@ -241,13 +170,17 @@ void NEFillBorderKernel::fill_replicate_single_channel(const Window &window) execute_window_loop(vertical, [&](const Coordinates & id) { - const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset()); - const auto left_val = *reinterpret_cast<T *>(vertical_it.ptr()); - const auto right_val = *(reinterpret_cast<T *>(vertical_it.ptr()) + width - 1); - + uint8_t *base_addr = start_valid_region + vertical_it.offset(); // Fill left and right borders - std::fill_n(row_start - _border_size.left, _border_size.left, left_val); - std::fill_n(row_start + width, _border_size.right, right_val); + for(unsigned int i = 0; i < _border_size.left; ++i) + { + std::memcpy(base_addr + static_cast<int>(i - _border_size.left) * element_size, vertical_it.ptr(), element_size); + } + + for(unsigned int i = 0; i < _border_size.right; ++i) + { + std::memcpy(base_addr + (width + i) * element_size, vertical_it.ptr() + (width - 1) * element_size, element_size); + } }, vertical_it); @@ -257,41 +190,33 @@ void NEFillBorderKernel::fill_replicate_single_channel(const Window &window) // Iterate over all XY planes execute_window_loop(window, [&](const Coordinates & id) { - const auto first_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset()); - + uint8_t *base_addr = start_valid_region + plane_it.offset(); // Top border for(int i = -_border_size.top; i < 0; ++i) { - const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]); - // Copy top rows including left/right borders - std::copy_n(first_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left); + std::memcpy(base_addr + i * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size, + base_addr - _border_size.left * element_size, (_border_size.left + width + _border_size.right) * element_size); } - const auto last_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + (height - 1) * _tensor->info()->strides_in_bytes()[1]); - // Bottom border for(unsigned int i = height; i < height + _border_size.bottom; ++i) { - const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]); - // Copy bottom rows including left/right borders - std::copy_n(last_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left); + std::memcpy(base_addr + i * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size, + base_addr + (height - 1) * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size, (_border_size.left + width + _border_size.right) * element_size); } }, plane_it); } -template <typename T> void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window) { - T constant_border_value; - _constant_border_value.get(constant_border_value); - uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor); const size_t width = _tensor->info()->valid_region().shape[0]; const size_t height = _tensor->info()->valid_region().shape[1]; const int stridey = _tensor->info()->strides_in_bytes()[1]; + const size_t element_size = _tensor->info()->element_size(); // Left and right border Window vertical(window); @@ -301,11 +226,17 @@ void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window execute_window_loop(vertical, [&](const Coordinates & id) { - const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset()); - + uint8_t *base_addr = start_valid_region + vertical_it.offset(); // Fill left and right borders - std::fill_n(row_start - _border_size.left, _border_size.left, constant_border_value); - std::fill_n(row_start + width, _border_size.right, constant_border_value); + for(unsigned int i = 0; i < _border_size.left; ++i) + { + std::memcpy(base_addr + static_cast<int>(i - _border_size.left) * element_size, &_constant_border_value, element_size); + } + + for(unsigned int i = 0; i < _border_size.right; ++i) + { + std::memcpy(base_addr + (width + i) * element_size, &_constant_border_value, element_size); + } }, vertical_it); @@ -319,21 +250,24 @@ void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window // Top border for(int i = -_border_size.top; i < 0; ++i) { - const auto row_start = reinterpret_cast<T *>(base_addr + i * stridey); - // Fill top rows including left/right borders - std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value); + for(unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j) + { + std::memcpy(base_addr + i * stridey + static_cast<int>(j - _border_size.left) * element_size, &_constant_border_value, element_size); + } } // Bottom border const unsigned low_border_size = height + _border_size.bottom; for(unsigned int i = height; i < low_border_size; ++i) { - const auto row_start = reinterpret_cast<T *>(base_addr + i * stridey); - // Fill bottom rows including left/right borders - std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value); + for(unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j) + { + std::memcpy(base_addr + i * stridey + static_cast<int>(j - _border_size.left) * element_size, &_constant_border_value, element_size); + } } }, plane_it); } +} // namespace arm_compute diff --git a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp new file mode 100644 index 0000000000..12872048c7 --- /dev/null +++ b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" + +#include "arm_compute/runtime/NEON/NEScheduler.h" + +namespace arm_compute +{ +INESimpleFunctionNoBorder::INESimpleFunctionNoBorder() // NOLINT + : _kernel() +{ +} + +void INESimpleFunctionNoBorder::run() +{ + NEScheduler::get().schedule(_kernel.get(), Window::DimY); +} +} // namespace arm_compute |