aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2021-03-09 14:09:08 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2021-03-31 17:08:51 +0000
commit33f41fabd30fb444aaa0cf3e65b61794d498d151 (patch)
treea381cff3096a3b05198b0cd311fee28e40fd5a4f /src
parent5f91b5d7063462854b62d342f9d4e04ae647e9a6 (diff)
downloadComputeLibrary-33f41fabd30fb444aaa0cf3e65b61794d498d151.tar.gz
Fix trademarks throughout the codebase
Resolves: COMPMID-4299 Change-Id: Ie6a52c1371b9a2a7b5bb4f019ecd5e70a2008567 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5338 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/helpers_asymm.h4
-rw-r--r--src/core/GPUTarget.cpp6
-rw-r--r--src/core/NEON/INESimpleKernel.h2
-rw-r--r--src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NECol2ImKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NECol2ImKernel.h2
-rw-r--r--src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h2
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h2
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h2
-rw-r--r--src/core/NEON/kernels/NEFillBorderKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h2
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h2
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h2
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h2
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h2
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h2
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h2
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h2
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpReductionKernel.h6
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h2
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h2
-rw-r--r--src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h2
-rw-r--r--src/core/NEON/kernels/NEGatherKernel.h2
-rw-r--r--src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NENormalizationLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEPadLayerKernel.h5
-rw-r--r--src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h2
-rw-r--r--src/core/NEON/kernels/NERangeKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.cpp6
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.h2
-rw-r--r--src/core/NEON/kernels/NERemapKernel.h2
-rw-r--r--src/core/NEON/kernels/NEReorgLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEReverseKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEStackLayerKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEStackLayerKernel.h2
-rw-r--r--src/core/NEON/kernels/NETileKernel.h2
-rw-r--r--src/core/NEON/kernels/NEWeightsReshapeKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEWeightsReshapeKernel.h2
-rw-r--r--src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h14
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp24
-rw-r--r--src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp2
-rw-r--r--src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp2
-rw-r--r--src/core/NEON/kernels/detail/NEActivationFunctionDetail.h26
-rw-r--r--src/core/NEON/wrapper/traits.h4
-rw-r--r--src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp2
-rw-r--r--src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp2
-rw-r--r--src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp2
-rw-r--r--src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp2
-rw-r--r--src/core/cpu/kernels/CpuReshapeKernel.cpp2
-rw-r--r--src/core/cpu/kernels/CpuTransposeKernel.cpp8
-rw-r--r--src/core/cpu/kernels/activation/NEON/fp16.cpp2
-rw-r--r--src/core/cpu/kernels/activation/NEON/fp32.cpp2
-rw-r--r--src/core/cpu/kernels/add/neon/list.h2
-rw-r--r--src/core/cpu/kernels/pooling/neon/quantized.h6
-rw-r--r--src/core/cpu/kernels/softmax/impl/NEON/list.h4
-rw-r--r--src/core/cpu/kernels/sub/neon/list.h2
-rw-r--r--src/graph/backends/NEON/NEDeviceBackend.cpp6
-rw-r--r--src/graph/backends/NEON/NEFunctionFactory.cpp8
-rw-r--r--src/graph/backends/NEON/NENodeValidator.cpp4
-rw-r--r--src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp2
-rw-r--r--src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp4
-rw-r--r--src/runtime/cpu/operators/CpuPooling.h2
69 files changed, 120 insertions, 123 deletions
diff --git a/src/core/CL/cl_kernels/helpers_asymm.h b/src/core/CL/cl_kernels/helpers_asymm.h
index eea4458170..27878cde36 100644
--- a/src/core/CL/cl_kernels/helpers_asymm.h
+++ b/src/core/CL/cl_kernels/helpers_asymm.h
@@ -192,7 +192,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
/** Each bit of the result is set to the corresponding bit of either then_val or
* else_val depending on whether the corresponding bit of if_mask is set.
- * Equivalent to the VBSL instruction in Arm Neon.
+ * Equivalent to the VBSL instruction in Arm® Neon™.
*
* @param[in] size Size of vector.
*
@@ -320,7 +320,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale)
}
/** Calculates (a+b)/2, rounded to the nearest integer.
- * Equivalent to VRHADD in the Arm Neon instruction set.
+ * Equivalent to VRHADD in the Arm Arm® Neon™ instruction set.
*
* @param[in] size Size of vector.
*
diff --git a/src/core/GPUTarget.cpp b/src/core/GPUTarget.cpp
index c1346b1fcc..14264cb883 100644
--- a/src/core/GPUTarget.cpp
+++ b/src/core/GPUTarget.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -146,7 +146,7 @@ GPUTarget get_target_from_name(const std::string &device_name)
if(!found_mali)
{
- ARM_COMPUTE_LOG_INFO_MSG_CORE("Can't find valid Mali GPU. Target is set to default.");
+ ARM_COMPUTE_LOG_INFO_MSG_CORE("Can't find valid Arm® Mali™ GPU. Target is set to default.");
return GPUTarget::MIDGARD;
}
@@ -179,7 +179,7 @@ GPUTarget get_target_from_name(const std::string &device_name)
// Report in case of unknown target
if(gpu_target == GPUTarget::UNKNOWN)
{
- ARM_COMPUTE_LOG_INFO_MSG_CORE("Mali GPU unknown. Target is set to the default one. (BIFROST)");
+ ARM_COMPUTE_LOG_INFO_MSG_CORE("Arm® Mali™ Mali GPU unknown. Target is set to the default one. (BIFROST)");
return GPUTarget::BIFROST;
}
diff --git a/src/core/NEON/INESimpleKernel.h b/src/core/NEON/INESimpleKernel.h
index d2b6de427b..2986e7b5c9 100644
--- a/src/core/NEON/INESimpleKernel.h
+++ b/src/core/NEON/INESimpleKernel.h
@@ -28,7 +28,7 @@
namespace arm_compute
{
-/** Interface for simple Neon kernels having 1 tensor input and 1 tensor output */
+/** Interface for simple CPU kernels having 1 tensor input and 1 tensor output */
using INESimpleKernel = ICPPSimpleKernel;
} // namespace arm_compute
#endif /*ARM_COMPUTE_INESIMPLEKERNEL_H */
diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
index 9d47d7d76f..1691943b07 100644
--- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
@@ -148,7 +148,7 @@ validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const IT
template <typename T, bool fused_activation, typename F>
void NEBatchNormalizationLayerKernel::batch_normalization_nchw(const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
const int window_step_x = 16 / sizeof(T);
@@ -164,7 +164,7 @@ void NEBatchNormalizationLayerKernel::batch_normalization_nchw(const Window &win
F activation_functor(_act_info);
// Hold information about the current feature map we are iterating.
- // Only compute denominator and Neon vectors once per feature map.
+ // Only compute denominator and constants once per feature map.
int slice = -1;
const auto input_mean = reinterpret_cast<const T *>(_mean->ptr_to_element(Coordinates(0, 0)));
diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
index c9066578b2..008ad7c9f4 100644
--- a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp
@@ -40,7 +40,7 @@ namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups)
{
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NCHW, DataLayout::NHWC);
diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp
index 26e4455c4a..4ba02f1542 100644
--- a/src/core/NEON/kernels/NECol2ImKernel.cpp
+++ b/src/core/NEON/kernels/NECol2ImKernel.cpp
@@ -44,7 +44,7 @@ namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims)
{
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
// Validate configured output
diff --git a/src/core/NEON/kernels/NECol2ImKernel.h b/src/core/NEON/kernels/NECol2ImKernel.h
index 00a519d229..397bf5ab17 100644
--- a/src/core/NEON/kernels/NECol2ImKernel.h
+++ b/src/core/NEON/kernels/NECol2ImKernel.h
@@ -32,7 +32,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to perform col2im reshaping.
+/** Kernel to perform col2im reshaping.
*
* Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel.
*
diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
index 101d1384d0..4cd1bc79fe 100644
--- a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp
@@ -69,7 +69,7 @@ Status NEConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input,
DataLayout data_layout)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() != 2);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != original_input_shape.total_size_lower(3));
diff --git a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
index 0be1fbe5aa..67d5ca246e 100644
--- a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
+++ b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
@@ -32,7 +32,7 @@ namespace arm_compute
// Forward declarations
class ITensor;
-/** Neon kernel to convert asymmetric signed to asymmetric signed and vice-versa */
+/** Kernel to convert asymmetric signed to asymmetric signed and vice-versa */
class NEConvertQuantizedSignednessKernel : public INEKernel
{
public:
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index e81d50fe5f..09f99748bf 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -149,7 +149,7 @@ inline bool run_optim_small_tensor(const ITensor *t)
// Optimized convolver for 1x1 kernels used only where input width and height are both <= 8
// For big Z as in Input=7x7x832, this implementation is faster than the general code becuase it doesn't need to
-// store intermidiate results in memory. Temporary results are stored in Neon registers directly and then written to the output buffer.
+// store intermidiate results in memory. Temporary results are stored in SIMD registers directly and then written to the output buffer.
template <unsigned int stridex>
class convolver_w1x1_i8x8_f32
{
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
index 58d385a138..258def77a3 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon interface for Direct Convolution Layer kernel */
+/** Interface for the kernel to perform Direct Convolution Layer. */
class NEDirectConvolutionLayerKernel : public INEKernel
{
public:
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
index 8dcbd00ddc..3597045bd5 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp
@@ -93,7 +93,7 @@ typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value,
output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN);
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
index cd0710d0c1..8f7eeb05b2 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
@@ -30,7 +30,7 @@
namespace arm_compute
{
class ITensor;
-/** Neon kernel to accumulate the biases, if provided, or downscale in case of quantized input.
+/** Kernel to accumulate the biases, if provided, or downscale in case of quantized input.
*
* @note We assume bias to be shared
* @note For quantized computations (i.e. @p input of S32 type) the output data type for auto-initialization must be passed as part
diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp
index 70178dffc0..1c7c1f9763 100644
--- a/src/core/NEON/kernels/NEFillBorderKernel.cpp
+++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp
@@ -103,7 +103,7 @@ void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, Bord
void NEFillBorderKernel::configure(ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_ERROR_ON(tensor->data_type() == DataType::UNKNOWN);
_border_size = border_size;
diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
index 96ed810e0e..9011680c9b 100644
--- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
@@ -47,7 +47,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
index 92fbd12a54..e592d5ef6e 100644
--- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
+++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to interleave the elements of a matrix
+/** Kernel to interleave the elements of a matrix
*
* This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
*
diff --git a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
index dfdb7b3236..acfb79edeb 100644
--- a/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
+++ b/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to multiply matrices
+/** Kernel to multiply matrices
*
* @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel
* This kernel performs the following computation:
diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h
index 9911ffc0f4..f71929fe9e 100644
--- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h
+++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place
+/** Kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place
*
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
* and adds to it the offset contribution of matrix A and matrix B in-place.
diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
index 39fbd8eb0e..6908f37aad 100644
--- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
+++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel.
+/** Kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel.
*
* The computation is performed in-place
*
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h
index 63d80aaf1b..021ff8e2e0 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
+/** Kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
*
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
* The following computations will be performed by the kernel:
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
index 8e92ba6eca..b01b204a6f 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
+/** Kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
*
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value.
* The following computations will be performed by the kernel:
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
index 9b51a3ba84..9e7dc2f599 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED
+/** Kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED
*
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value.
* The following computations will be performed by the kernel:
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
index 4d43afaab2..def0573967 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
+/** Kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
*
* This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value.
* The following computations will be performed by the kernel:
diff --git a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.h b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.h
index 521adbfca4..9be618d656 100644
--- a/src/core/NEON/kernels/NEGEMMLowpReductionKernel.h
+++ b/src/core/NEON/kernels/NEGEMMLowpReductionKernel.h
@@ -32,7 +32,7 @@ namespace arm_compute
class ITensor;
struct GEMMLowpReductionKernelInfo;
-/** Common interface for all Neon reduction kernels */
+/** Common interface for all reduction kernels */
class INEGEMMLowpReductionKernel : public INEKernel
{
public:
@@ -69,7 +69,7 @@ protected:
bool _mul_by_scalar;
};
-/** Neon kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
+/** Kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
*
* @note This stage is needed to handle the offset of matrix product
* https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
@@ -130,7 +130,7 @@ private:
void run_internal(const Window &window);
};
-/** Neon kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
+/** Kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
*
* @note This stage is needed to handle the offset of matrix product
* https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
index f9ff143e07..c896cabc6a 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
+++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
+/** Kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
*
* @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size
*
diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
index e2945ee117..3bc162a1b4 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
+++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication
+/** Kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication
*
* @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel
* @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
index ad7ae505f4..f6a453cbbc 100644
--- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp
@@ -51,7 +51,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
if(output->total_size() != 0)
{
diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
index 583588a1c1..7ca71cf414 100644
--- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
+++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
@@ -31,7 +31,7 @@ namespace arm_compute
// Forward declarations
class ITensor;
-/** Neon kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
+/** Kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
*
* Following an example of how the transposition1xW works when the input data is F32
*
diff --git a/src/core/NEON/kernels/NEGatherKernel.h b/src/core/NEON/kernels/NEGatherKernel.h
index 46b41b28e3..0711f8190b 100644
--- a/src/core/NEON/kernels/NEGatherKernel.h
+++ b/src/core/NEON/kernels/NEGatherKernel.h
@@ -33,7 +33,7 @@ namespace arm_compute
// Forward declarations
class ITensor;
-/** Kernel to perform other operation on Neon */
+/** Kernel to perform gather operation. */
class NEGatherKernel : public INEKernel
{
public:
diff --git a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
index fa1f7a6c49..d33431a8d2 100644
--- a/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp
@@ -82,7 +82,7 @@ inline float16x8_t vector_float_norm(const float16x8_t &inputs, const float32x4_
template <typename T, typename AccType = T>
void instance_normalization_nchw(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
// Clear X/Y dimensions on execution window as we handle the planes manually
diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
index 21a70ae513..f1c5d3f6e6 100644
--- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
@@ -170,7 +170,7 @@ void NENormalizationLayerKernel::configure(const ITensor *input, const ITensor *
template <typename T, unsigned int S, unsigned int dim, bool do_2D_norm>
void NENormalizationLayerKernel::normalize_float(const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
Window win(window);
diff --git a/src/core/NEON/kernels/NEPadLayerKernel.h b/src/core/NEON/kernels/NEPadLayerKernel.h
index af0dbfdc64..00cda7dc22 100644
--- a/src/core/NEON/kernels/NEPadLayerKernel.h
+++ b/src/core/NEON/kernels/NEPadLayerKernel.h
@@ -30,10 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to add padding to a tensor
- *
- * Add padding given padding information
- */
+/** Basic kernel to pad the input tensor given padding information. */
class NEPadLayerKernel : public INEKernel
{
public:
diff --git a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h
index 5522ae889a..a3ff6e988f 100644
--- a/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h
+++ b/src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h
@@ -31,7 +31,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to perform layer normalization */
+/** Kernel to perform layer normalization for QLSTM. */
class NEQLSTMLayerNormalizationKernel : public INEKernel
{
public:
diff --git a/src/core/NEON/kernels/NERangeKernel.cpp b/src/core/NEON/kernels/NERangeKernel.cpp
index 170f58fd7a..d0c51f8497 100644
--- a/src/core/NEON/kernels/NERangeKernel.cpp
+++ b/src/core/NEON/kernels/NERangeKernel.cpp
@@ -43,7 +43,7 @@ namespace
template <typename T>
void range_function(ITensor *output, float start, float step, const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector<T, wrapper::traits::BitWidth::W128>::tag_type;
const auto step_vec = wrapper::vdup_n(static_cast<T>(step), ExactTagType{});
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index 875d23333e..553048c7dd 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -386,7 +386,7 @@ public:
template <typename T, int S>
struct RedOpX
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
inline void operator()(const Window &in_window, Window &out_window, const ITensor *in, ITensor *out, const ReductionOperation op)
@@ -908,7 +908,7 @@ struct RedOpX_quantized
template <typename T, int S>
struct RedOpYZW
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
using neon_vector = typename wrapper::traits::neon_vector<T, S>::type;
@@ -1127,7 +1127,7 @@ struct RedOpYZW
template <typename T, int S, int axis, ReductionOperation op>
struct RedOpYZW_complex
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
using neon_vector = typename wrapper::traits::neon_vector<T, S>::type;
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.h b/src/core/NEON/kernels/NEReductionOperationKernel.h
index c7ed0070be..667305b3aa 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.h
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to perform a reduction operation
+/** Kernel to perform a reduction operation
*
* @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized
* output tensor is signed 32-bit integer (S32). It is the user's responsibility
diff --git a/src/core/NEON/kernels/NERemapKernel.h b/src/core/NEON/kernels/NERemapKernel.h
index adc7f4bdd5..21cb67ef58 100644
--- a/src/core/NEON/kernels/NERemapKernel.h
+++ b/src/core/NEON/kernels/NERemapKernel.h
@@ -31,7 +31,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to perform a remap on a tensor */
+/** Kernel to perform a remap on a tensor */
class NERemapKernel : public INEKernel
{
public:
diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp
index 75a58fccd6..a7b830c066 100644
--- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp
@@ -42,7 +42,7 @@ namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t stride)
{
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp
index b2fce0f56d..758433f89f 100644
--- a/src/core/NEON/kernels/NEReverseKernel.cpp
+++ b/src/core/NEON/kernels/NEReverseKernel.cpp
@@ -37,7 +37,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, axis);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(axis, 1, DataType::U32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis->num_dimensions() > 1, "Axis must be a 1D tensor");
diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp
index 64310e7f7f..07ce829c43 100644
--- a/src/core/NEON/kernels/NEStackLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp
@@ -44,7 +44,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(idx_input >= num_tensors);
ARM_COMPUTE_RETURN_ERROR_ON(axis > input->num_dimensions());
diff --git a/src/core/NEON/kernels/NEStackLayerKernel.h b/src/core/NEON/kernels/NEStackLayerKernel.h
index dc5c7d77ad..9b36518e4d 100644
--- a/src/core/NEON/kernels/NEStackLayerKernel.h
+++ b/src/core/NEON/kernels/NEStackLayerKernel.h
@@ -32,7 +32,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/
+/** Basic kernel to stack a rank-R tensor into one with rank-(R+1) along the axis dimension. */
class NEStackLayerKernel : public INEKernel
{
public:
diff --git a/src/core/NEON/kernels/NETileKernel.h b/src/core/NEON/kernels/NETileKernel.h
index e6ce9534e7..47f306afd0 100644
--- a/src/core/NEON/kernels/NETileKernel.h
+++ b/src/core/NEON/kernels/NETileKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to perform a tile operation */
+/** Basic kernel to perform a tile operation */
class NETileKernel : public INEKernel
{
public:
diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
index 44d60093f0..9bef9c30d9 100644
--- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
+++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp
@@ -47,7 +47,7 @@ TensorShape get_output_shape(const ITensorInfo *input, bool has_bias)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
if(biases != nullptr)
diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.h b/src/core/NEON/kernels/NEWeightsReshapeKernel.h
index a4a9e28763..76eca9fe86 100644
--- a/src/core/NEON/kernels/NEWeightsReshapeKernel.h
+++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.h
@@ -30,7 +30,7 @@ namespace arm_compute
{
class ITensor;
-/** Neon kernel to perform reshaping on the weights used by convolution and locally connected layer
+/** Kernel to perform reshaping on the weights used by convolution and locally connected layer
*
* Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
* In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication.
diff --git a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
index 3583735482..75d257de4b 100644
--- a/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
+++ b/src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
@@ -35,7 +35,7 @@ namespace arm_compute
// Forward declarations
class ITensor;
-/** Interface for the Neon kernel to perform Winograd input transform. */
+/** Interface for the kernel to perform Winograd input transform. */
class INEWinogradLayerTransformInputKernel : public INEKernel
{
public:
@@ -96,7 +96,7 @@ public:
}
};
-/** Neon kernel to perform Winograd input transform. */
+/** Kernel to perform Winograd input transform. */
template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel
{
@@ -227,7 +227,7 @@ private:
ITensor *_workspace;
};
-/** Interface for the Neon kernel to perform Winograd output transform. */
+/** Interface for the kernel to perform Winograd output transform. */
class INEWinogradLayerTransformOutputKernel : public INEKernel
{
public:
@@ -310,7 +310,7 @@ public:
}
};
-/** Neon kernel to perform Winograd output transform. */
+/** Kernel to perform Winograd output transform. */
template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel
{
@@ -437,7 +437,7 @@ private:
int _num_channels;
};
-/** Interface for the Neon kernel to perform Winograd weights transform. */
+/** Interface for the kernel to perform Winograd weights transform. */
class INEWinogradLayerTransformWeightsKernel : public INEKernel
{
public:
@@ -495,7 +495,7 @@ public:
static Status validate(const ITensorInfo *input, const ITensorInfo *weights);
};
-/** Neon kernel to perform Winograd weights transform. */
+/** Kernel to perform Winograd weights transform. */
template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel
{
@@ -577,7 +577,7 @@ private:
int _num_input_channels;
};
-/** Neon kernel to perform Winograd. */
+/** Kernel to perform Winograd. */
template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
class NEWinogradLayerConfiguration
{
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
index 5c894c01c8..6c3743dce7 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
@@ -111,7 +111,7 @@ static const GemmImplementation<float, float> gemm_fp32_methods[] =
},
#endif // __ARM_FEATURE_SVE
-// Neon hybrid methods
+// Arm® Neon™ hybrid methods
{
GemmMethod::GEMM_HYBRID,
"a64_smallK_hybrid_fp32_mla_8x4",
diff --git a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp
index bb86d9e41d..0d56b46e19 100644
--- a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp
+++ b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp
@@ -292,21 +292,21 @@ void Interleave(TOut *out, const TIn *in, size_t in_stride, const unsigned int y
/* AArch32 */
#ifdef __arm__
/* FP32 */
-/* Neon implementation (height 6) */
+/* Arm® Neon™ implementation (height 6) */
template void IndirectInterleave<6, 1, VLType::None>(float *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<6, 1, VLType::None>(float *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<6, 1, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
/* FP16 */
#if __ARM_FP16_ARGS
-/* Neon implementation using FP32 kernel (height 6) */
+/* Arm® Neon™ implementation using FP32 kernel (height 6) */
template void IndirectInterleave<6, 1, VLType::None>(float *, const __fp16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<6, 1, VLType::None>(float *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<6, 1, VLType::None>(float *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
#endif /* __ARM_FP16_ARGS */
/* BF16 */
-/* Neon implementation using FP32 kernel */
+/* Arm® Neon™ implementation using FP32 kernel */
template void IndirectInterleave<6, 1, VLType::None>(float *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<6, 1, VLType::None>(float *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<6, 1, VLType::None>(float *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
@@ -315,7 +315,7 @@ template void Interleave<6, 1, VLType::None>(float *, const bfloat16 *, size_t,
/* AArch64 */
#ifdef __aarch64__
/* FP32 */
-/* Neon/SVE implementation (height 8) */
+/* Arm® Neon™/SVE implementation (height 8) */
template void IndirectInterleave<8, 1, VLType::None>(float *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 1, VLType::None>(float *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
@@ -339,7 +339,7 @@ template void ConvolutionInterleave<8, 1, VLType::None>(float *, const __fp16 *,
template void Interleave<8, 1, VLType::None>(float *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
/* BF16 */
-/* Neon/SVE BFDOT */
+/* Arm® Neon™/SVE BFDOT */
#ifdef V8P6_BF
template void IndirectInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
@@ -350,7 +350,7 @@ template void ConvolutionInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat
template void Interleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
#endif // V8P6_BF
-/* Neon/SVE using FP32 kernel */
+/* Arm® Neon™/SVE using FP32 kernel */
template void IndirectInterleave<8, 1, VLType::None>(float *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<8, 1, VLType::None>(float *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(float *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
@@ -365,12 +365,12 @@ template void ConvolutionInterleave<8, 1, VLType::None>(uint16_t *, const uint16
template void Interleave<8, 1, VLType::None>(uint16_t *, const uint16_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
/* INT8 */
-/* Neon SMLA/SMLAL (height 4, block 16) */
+/* Arm® Neon™ SMLA/SMLAL (height 4, block 16) */
template void IndirectInterleave<4, 16, VLType::None>(int8_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<4, 16, VLType::None>(int8_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<4, 16, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-/* Neon SDOT (height 8, block 4) */
+/* Arm® Neon™ SDOT (height 8, block 4) */
template void IndirectInterleave<8, 4, VLType::None>(int8_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
@@ -382,17 +382,17 @@ template void ConvolutionInterleave<8, 8, VLType::None>(int8_t *, const int8_t *
template void Interleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
#endif // MMLA_INT8
-/* Neon SDOT (height 8, block 1) */
+/* Arm® Neon™ SDOT (height 8, block 1) */
template void IndirectInterleave<8, 1, VLType::None>(int16_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 1, VLType::None>(int16_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(int16_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-/* Neon SMLA/SMLAL (height 4, block 16) */
+/* Arm® Neon™ SMLA/SMLAL (height 4, block 16) */
template void IndirectInterleave<4, 16, VLType::None>(uint8_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void ConvolutionInterleave<4, 16, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<4, 16, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
-/* Neon SDOT (height 8, block 4) */
+/* Arm® Neon™ SDOT (height 8, block 4) */
template void IndirectInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
@@ -404,7 +404,7 @@ template void ConvolutionInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t
template void Interleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
#endif // MMLA_INT8
-/* Neon 16-bit (height 8, block 1) */
+/* Arm® Neon™ 16-bit (height 8, block 1) */
template void IndirectInterleave<8, 1, VLType::None>(uint16_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t);
template void ConvolutionInterleave<8, 1, VLType::None>(uint16_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
template void Interleave<8, 1, VLType::None>(uint16_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t);
diff --git a/src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp b/src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp
index c105adac70..ed5254a0a4 100644
--- a/src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp
+++ b/src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp
@@ -44,7 +44,7 @@ template <typename T>
void batch_normalization(ITensor *src, ITensor *dst, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma,
float epsilon, ActivationLayerInfo &act_info, const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<float16_t, wrapper::traits::BitWidth::W128>;
const int window_step_x = 8;
diff --git a/src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp b/src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp
index 4a90a211c7..d6e22e1843 100644
--- a/src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp
+++ b/src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp
@@ -43,7 +43,7 @@ template <typename T>
void batch_normalization(ITensor *src, ITensor *dst, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma,
float epsilon, ActivationLayerInfo &act_info, const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<float, wrapper::traits::BitWidth::W128>;
const int window_step_x = 4;
diff --git a/src/core/NEON/kernels/detail/NEActivationFunctionDetail.h b/src/core/NEON/kernels/detail/NEActivationFunctionDetail.h
index e68f1117e8..25d682d8ae 100644
--- a/src/core/NEON/kernels/detail/NEActivationFunctionDetail.h
+++ b/src/core/NEON/kernels/detail/NEActivationFunctionDetail.h
@@ -34,7 +34,7 @@ namespace detail
template <typename T, int S>
struct dummy
{
- /** Neon vector type. */
+ /** SIMD vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
/** Construct a dummy activation object.
@@ -68,9 +68,9 @@ struct dummy
template <typename T, int S>
struct linear
{
- /** Neon vector type. */
+ /** SIMD vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a Linear activation object.
@@ -112,9 +112,9 @@ struct linear
template <typename T, int S>
struct square
{
- /** Neon vector type. */
+ /** SIMD vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a Square activation object.
@@ -148,9 +148,9 @@ struct square
template <typename T, int S>
struct logistic
{
- /** Neon vector type. */
+ /** SIMD vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a Logistic activation object.
@@ -188,9 +188,9 @@ struct logistic
template <typename T, int S>
struct relu
{
- /** Neon vector type. */
+ /** SIMD vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a RELU activation object.
@@ -228,9 +228,9 @@ struct relu
template <typename T, int S>
struct brelu
{
- /** Neon vector type. */
+ /** SIMD vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a bounded RELU activation object.
@@ -270,9 +270,9 @@ struct brelu
template <typename T, int S>
struct lubrelu
{
- /** Neon vector type. */
+ /** SIMD vector type. */
using ExactType = typename wrapper::traits::neon_vector<T, S>::type;
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type;
/** Construct a lower-upper bounded RELU activation object.
diff --git a/src/core/NEON/wrapper/traits.h b/src/core/NEON/wrapper/traits.h
index b786e44bc7..3452b76761 100644
--- a/src/core/NEON/wrapper/traits.h
+++ b/src/core/NEON/wrapper/traits.h
@@ -44,7 +44,7 @@ struct vector_64_tag {};
/** 128-bit vector tag */
struct vector_128_tag {};
-/** Create the appropriate Neon vector given its type and size in terms of elements */
+/** Create the appropriate SIMD vector given its type and size in terms of elements */
template <typename T, int S> struct neon_vector;
// Specializations
@@ -88,7 +88,7 @@ enum class BitWidth
W128, /**< 128-bit width */
};
-/** Create the appropriate Neon vector given its type and size in terms of bits */
+/** Create the appropriate SIMD vector given its type and size in terms of bits */
template <typename T, BitWidth BW> struct neon_bitvector;
// Specializations
#ifndef DOXYGEN_SKIP_THIS
diff --git a/src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp b/src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp
index bd6d777572..aaa1898ce9 100644
--- a/src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp
@@ -133,7 +133,7 @@ void batch_concat(const ITensor *src, ITensor *dst, unsigned int batch_offset, c
Status validate_arguments(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
diff --git a/src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp b/src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp
index d8eed44cd8..35c189caeb 100644
--- a/src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp
@@ -134,7 +134,7 @@ void depth_concat(const ITensor *src, ITensor *dst, unsigned int depth_offset, c
Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
diff --git a/src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp b/src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp
index 4dc458a4a8..363c271a68 100644
--- a/src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp
@@ -49,7 +49,7 @@ namespace
Status validate_arguments(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX));
diff --git a/src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp b/src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp
index efefd5d011..11b1db5bc2 100644
--- a/src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp
@@ -49,7 +49,7 @@ namespace
Status validate_arguments(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) + width_offset > dst->dimension(0));
diff --git a/src/core/cpu/kernels/CpuReshapeKernel.cpp b/src/core/cpu/kernels/CpuReshapeKernel.cpp
index 4ab1612518..70c652695a 100644
--- a/src/core/cpu/kernels/CpuReshapeKernel.cpp
+++ b/src/core/cpu/kernels/CpuReshapeKernel.cpp
@@ -50,7 +50,7 @@ namespace
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
diff --git a/src/core/cpu/kernels/CpuTransposeKernel.cpp b/src/core/cpu/kernels/CpuTransposeKernel.cpp
index ed08aa1aa0..c7cafe94a8 100644
--- a/src/core/cpu/kernels/CpuTransposeKernel.cpp
+++ b/src/core/cpu/kernels/CpuTransposeKernel.cpp
@@ -95,7 +95,7 @@ void transpose_8bit_elements(const ITensor *in, ITensor *out, const Window &wind
Iterator output(out, window_out);
- // Run the Neon path if and only if the input is not a row-vector
+ // Run the SIMD path if and only if the input is not a row-vector
if(in->info()->dimension(1) != 1)
{
Iterator input(in, window_in);
@@ -234,7 +234,7 @@ void transpose_16bit_elements(const ITensor *in, ITensor *out, const Window &win
Iterator output(out, window_out);
- // Run the Neon path if and only if the input is not a row-vector
+ // Run the SIMD path if and only if the input is not a row-vector
if(in->info()->dimension(1) != 1)
{
Iterator input(in, window_in);
@@ -347,7 +347,7 @@ void transpose_32bit_elements(const ITensor *in, ITensor *out, const Window &win
Iterator output(out, window_out);
- // Run the Neon path if and only if the input is not a row-vector
+ // Run the SIMD path if and only if the input is not a row-vector
if(in->info()->dimension(1) != 1)
{
Iterator input(in, window_in);
@@ -455,7 +455,7 @@ void CpuTransposeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
Status CpuTransposeKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
- //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use Neon FP16 instructions.
+ //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
// Error if input is not 8 bit, 16bit or 32bit
diff --git a/src/core/cpu/kernels/activation/NEON/fp16.cpp b/src/core/cpu/kernels/activation/NEON/fp16.cpp
index 0ddd43ea0e..6f2d5d8533 100644
--- a/src/core/cpu/kernels/activation/NEON/fp16.cpp
+++ b/src/core/cpu/kernels/activation/NEON/fp16.cpp
@@ -50,7 +50,7 @@ inline float16x8_t mask_float_vector(const float16x8_t &in, const uint16x8_t &ma
void fp16_neon_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<float16_t, wrapper::traits::BitWidth::W128>;
const ActivationLayerInfo::ActivationFunction act = act_info.activation();
diff --git a/src/core/cpu/kernels/activation/NEON/fp32.cpp b/src/core/cpu/kernels/activation/NEON/fp32.cpp
index 244ca5739f..54301d45ad 100644
--- a/src/core/cpu/kernels/activation/NEON/fp32.cpp
+++ b/src/core/cpu/kernels/activation/NEON/fp32.cpp
@@ -48,7 +48,7 @@ inline float32x4_t mask_float_vector(const float32x4_t &in, const uint32x4_t &ma
void fp32_neon_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename arm_compute::wrapper::traits::neon_bitvector_tag_t<float, wrapper::traits::BitWidth::W128>;
constexpr int window_step_x = 4;
diff --git a/src/core/cpu/kernels/add/neon/list.h b/src/core/cpu/kernels/add/neon/list.h
index 964bdccca3..3ab03dd40e 100644
--- a/src/core/cpu/kernels/add/neon/list.h
+++ b/src/core/cpu/kernels/add/neon/list.h
@@ -47,7 +47,7 @@ DECLARE_ADD_KERNEL(add_u8_u8_s16_neon);
template <typename ScalarType>
void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<ScalarType, wrapper::traits::BitWidth::W128>;
// Create input windows
diff --git a/src/core/cpu/kernels/pooling/neon/quantized.h b/src/core/cpu/kernels/pooling/neon/quantized.h
index 535fb53d87..a16960a205 100644
--- a/src/core/cpu/kernels/pooling/neon/quantized.h
+++ b/src/core/cpu/kernels/pooling/neon/quantized.h
@@ -473,7 +473,7 @@ void pooling2_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *ds
Iterator in(src, window_src);
Iterator out(dst0, window);
- /** Neon vector types */
+ /** SIMD vector types */
using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
using q8x8x2_t = typename std::conditional<std::is_same<T, uint8_t>::value, uint8x8x2_t, int8x8x2_t>::type;
@@ -602,7 +602,7 @@ void pooling3_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *ds
Iterator in(src, window_src);
Iterator out(dst0, window);
- /** Neon vector types */
+ /** SIMD vector types */
using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
using q8x16_t = typename wrapper::traits::neon_vector<T, 16>::type;
using q8x8x2_t = typename std::conditional<std::is_same<T, uint8_t>::value, uint8x8x2_t, int8x8x2_t>::type;
@@ -756,7 +756,7 @@ void poolingMxN_quantized_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *
Iterator in(src, window_src);
Iterator out(dst0, window);
- /** Neon vector types */
+ /** SIMD vector types */
using q8x8_t = typename wrapper::traits::neon_vector<T, 8>::type;
using q16_t = typename wrapper::traits::promote_t<T>;
using q16x8_t = typename wrapper::traits::neon_vector<q16_t, 8>::type;
diff --git a/src/core/cpu/kernels/softmax/impl/NEON/list.h b/src/core/cpu/kernels/softmax/impl/NEON/list.h
index 740e6ea9bc..5ebee31272 100644
--- a/src/core/cpu/kernels/softmax/impl/NEON/list.h
+++ b/src/core/cpu/kernels/softmax/impl/NEON/list.h
@@ -36,7 +36,7 @@ namespace cpu
template <typename T>
void neon_logits_1d_max(const ITensor *in, ITensor *out, const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
constexpr int window_step_x = 16 / sizeof(T);
@@ -267,7 +267,7 @@ void neon_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *c
Iterator max_it(max, window);
Iterator out_it(out, window);
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
constexpr int vec_size = 16 / sizeof(T);
diff --git a/src/core/cpu/kernels/sub/neon/list.h b/src/core/cpu/kernels/sub/neon/list.h
index 8c82402513..1ab4e6367b 100644
--- a/src/core/cpu/kernels/sub/neon/list.h
+++ b/src/core/cpu/kernels/sub/neon/list.h
@@ -47,7 +47,7 @@ DECLARE_SUB_KERNEL(sub_u8_u8_s16_neon);
template <typename T>
void sub_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
{
- /** Neon vector tag type. */
+ /** SIMD vector tag type. */
using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
bool is_sat = policy == ConvertPolicy::SATURATE;
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index a6c4fe9aa3..9efa3ac0c8 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -52,7 +52,7 @@ namespace graph
{
namespace backends
{
-/** Register Neon backend */
+/** Register CPU backend */
static detail::BackendRegistrar<NEDeviceBackend> NEDeviceBackend_registrar(Target::NEON);
NEDeviceBackend::NEDeviceBackend()
@@ -138,7 +138,7 @@ std::unique_ptr<ITensorHandle> NEDeviceBackend::create_subtensor(ITensorHandle *
std::unique_ptr<arm_compute::IFunction> NEDeviceBackend::configure_node(INode &node, GraphContext &ctx)
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Configuring Neon node with ID : " << node.id() << std::endl);
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Configuring CPU node with ID : " << node.id() << std::endl);
ARM_COMPUTE_ERROR_ON(node.assigned_target() != Target::NEON);
// Configure node
@@ -147,7 +147,7 @@ std::unique_ptr<arm_compute::IFunction> NEDeviceBackend::configure_node(INode &n
arm_compute::Status NEDeviceBackend::validate_node(INode &node)
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating Neon node with ID : " << node.id() << std::endl);
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating CPU node with ID : " << node.id() << std::endl);
ARM_COMPUTE_ERROR_ON(node.assigned_target() != Target::NEON);
return NENodeValidator::validate(&node);
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 6cf5874633..0fc5291648 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -54,7 +54,7 @@ struct NETargetInfo
Target NETargetInfo::TargetType = Target::NEON;
-/** Collection of Neon convolution functions */
+/** Collection of CPU convolution functions */
struct NEConvolutionLayerFunctions
{
using GenericConvolutionLayer = NEConvolutionLayer;
@@ -63,7 +63,7 @@ struct NEConvolutionLayerFunctions
using WinogradConvolutionLayer = NEWinogradConvolutionLayer;
};
-/** Collection of Neon element-wise functions */
+/** Collection of CPU element-wise functions */
struct NEEltwiseFunctions
{
using Addition = NEArithmeticAddition;
@@ -73,13 +73,13 @@ struct NEEltwiseFunctions
using Division = NEElementwiseDivision;
};
-/** Collection of Neon unary element-wise functions */
+/** Collection of CPU unary element-wise functions */
struct NEUnaryEltwiseFunctions
{
using Exp = NEExpLayer;
};
-/** Function and tensor types to be used inside a Neon fused convolution/batch normalization layer */
+/** Function and tensor types to be used inside a fused convolution/batch normalization layer */
struct NEFusedLayerTypes
{
using ConvolutionLayer = NEConvolutionLayer;
diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp
index 8453cfa03a..d6e372004b 100644
--- a/src/graph/backends/NEON/NENodeValidator.cpp
+++ b/src/graph/backends/NEON/NENodeValidator.cpp
@@ -50,7 +50,7 @@ namespace graph
{
namespace backends
{
-/** Collection of Neon element-wise functions */
+/** Collection of CPU element-wise functions */
struct NEEltwiseLayerFunctions
{
using ArithmeticAddition = NEArithmeticAddition;
@@ -60,7 +60,7 @@ struct NEEltwiseLayerFunctions
using ArithmeticDivision = NEElementwiseDivision;
};
-/** Collection of Neon unary element-wise functions */
+/** Collection of CPU unary element-wise functions */
struct NEUnaryEltwiseLayerFunctions
{
using ExpLayer = NEExpLayer;
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
index e72a6c3226..7fb1d583ff 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
@@ -285,7 +285,7 @@ private:
/** Assembly Gemm kernel */
std::shared_ptr<arm_gemm::GemmCommon<TypeInput, TypeOutput>> _gemm_kernel_asm{ nullptr };
- /** Optimised Neon kernel */
+ /** Optimised Arm® Neon™ kernel */
std::unique_ptr<INEKernel> _optimised_kernel{ nullptr };
/** Input A */
const ITensor *_a
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 59747a82f9..900330e4a6 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -431,7 +431,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::BFLOAT16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::BFLOAT16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Grouping (num_groups != 1) is not supported on Neon");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Grouping (num_groups != 1) is not supported");
const DataLayout data_layout = input->data_layout();
const DataType data_type = input->data_type();
@@ -523,7 +523,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
if(!skip_im2col)
{
// Create tensor info for im2col reshaped inputs
- // For Neon the batch size is on the fourth dimension
+ // For CPU, the batch size is on the fourth dimension
// TODO (giaiod01): Auto-initialize the output shape of im2col COMPMID-1482
TensorShape shape_im2col = input->tensor_shape();
shape_im2col.set(0, mat_weights_rows);
diff --git a/src/runtime/cpu/operators/CpuPooling.h b/src/runtime/cpu/operators/CpuPooling.h
index 9ebcd5f6aa..b1647ea689 100644
--- a/src/runtime/cpu/operators/CpuPooling.h
+++ b/src/runtime/cpu/operators/CpuPooling.h
@@ -40,7 +40,7 @@ namespace cpu
{
// Forward Declarations
class CpuPoolingAssemblyDispatch;
-/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following Neon kernels:
+/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following kernels:
*
* -# @ref NEFillBorderKernel (executed if padding size is different from zero)
* -# @ref kernels::CpuPoolingKernel