diff options
Diffstat (limited to 'arm_compute')
7 files changed, 116 insertions, 73 deletions
diff --git a/arm_compute/core/CPP/Validate.h b/arm_compute/core/CPP/Validate.h new file mode 100644 index 0000000000..1799f9003e --- /dev/null +++ b/arm_compute/core/CPP/Validate.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPP_VALIDATE_H__ +#define __ARM_COMPUTE_CPP_VALIDATE_H__ + +#include "arm_compute/core/Validate.h" + +namespace arm_compute +{ +/** Return an error if the data type of the passed tensor info is FP16 and FP16 support is not compiled in. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor_info Tensor info to validate. + * + * @return Status + */ +inline arm_compute::Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, + const ITensorInfo *tensor_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); +#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::F16, + function, file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above"); +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + return arm_compute::Status {}; +} + +/** Return an error if the data type of the passed tensor is FP16 and FP16 support is not compiled in. + * + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] tensor Tensor to validate. + * + * @return Status + */ +inline arm_compute::Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, + const ITensor *tensor) +{ + ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(function, file, line, tensor->info())); + return arm_compute::Status{}; +} + +#define ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ + ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor)) + +#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor)) +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CPP_VALIDATE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 523f1d33a1..36c9587969 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -85,7 +85,7 @@ private: NEGEMMInterleave4x4Kernel _interleave_kernel; NEGEMMTranspose1xWKernel _transpose_kernel; NEGEMMMatrixMultiplyKernel _mm_kernel; - NEGEMMAssemblyDispatchF32 _asm_glue; + NEGEMMAssemblyDispatch _asm_glue; NEGEMMMatrixAdditionKernel _ma_kernel; Tensor _tmp_a; Tensor _tmp_b; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h index 1c9ecb088e..382ef1caba 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h @@ -35,7 +35,6 @@ namespace arm_compute { /** Assembly kernel glue */ -template <typename TypeInput, typename TypeOutput> class NEGEMMAssemblyDispatch : public IFunction { public: @@ -43,12 +42,21 @@ public: NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Prevent instances of this class from being copy constructed */ - NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch<TypeInput, TypeOutput> &) = delete; + NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete; /** Prevent instances of this class from being copied */ - NEGEMMAssemblyDispatch<TypeInput, TypeOutput> &operator=(const NEGEMMAssemblyDispatch<TypeInput, TypeOutput> &) = delete; - NEGEMMAssemblyDispatch(NEGEMMAssemblyDispatch<TypeInput, TypeOutput> &&) = default; - NEGEMMAssemblyDispatch<TypeInput, TypeOutput> &operator=(NEGEMMAssemblyDispatch<TypeInput, TypeOutput> &&) = default; - ~NEGEMMAssemblyDispatch() = default; + NEGEMMAssemblyDispatch &operator=(const NEGEMMAssemblyDispatch &) = delete; + NEGEMMAssemblyDispatch(NEGEMMAssemblyDispatch &&) = default; + NEGEMMAssemblyDispatch &operator=(NEGEMMAssemblyDispatch &&) = default; + ~NEGEMMAssemblyDispatch() = default; + + class IFallback + { + public: + virtual void run() = 0; + virtual void prepare() = 0; + virtual bool is_configured() const = 0; + virtual ~IFallback() = default; + }; private: /** ACL Function */ @@ -68,53 +76,9 @@ private: */ bool create_function(arm_gemm::GemmMethod method, const ITensor *a, const ITensor *b, ITensor *d, float alpha, float beta, bool pretranspose_hint); - //Fallback: use arm_gemm's AssemblyGemm: - class Fallback - { -#ifndef DOXYGEN_SKIP_THIS - public: - /** Configures the arrays pointers and strides in the assembly kernel and executes the assembly kernel. - * The call to set_arrays is needed to deal with the input sizes containing batches (dims > 2) - */ - void run(); - void configure(const ITensor *a, const ITensor *b, ITensor *d, arm_gemm::GemmArgs<TypeOutput> &args, MemoryGroup &memory_group); - void prepare(); - bool is_configured() const; -#endif /* DOXYGEN_SKIP_THIS */ - - private: - /** Allocate a workspace tensor. - * - * @param[in] workspace_size Size to allocate. - * @param[in] memory_group Tensor memory group. - * @param[in] alignment Workspace memory alignment. - */ - void allocate_workspace(size_t workspace_size, MemoryGroup *memory_group, size_t alignment); - - /** Assembly Gemm kernel */ - std::unique_ptr<arm_gemm::GemmCommon<TypeInput, TypeOutput>> _gemm_kernel_asm{ nullptr }; - /** Optimised NEON kernel */ - std::unique_ptr<INEKernel> _optimised_kernel{ nullptr }; - /** Input A */ - const ITensor *_a - { - nullptr - }; - /** Input B */ - const ITensor *_b - { - nullptr - }; - /** Output */ - ITensor *_d{ nullptr }; - /** GEMM workspace */ - Tensor _workspace{}; - /** Pre-transpose tensor */ - Tensor _pretranspose{}; - /** Prepared flag */ - bool _is_prepared{ false }; - } _arm_gemm; /**< Fallback in case ACL doesn't have a function */ - MemoryGroup _memory_group; /**< Function memory group */ + /** Interface for the arm_gemm fallback */ + std::unique_ptr<IFallback> _arm_gemm; + MemoryGroup _memory_group; /**< Function memory group */ public: /** If supported create an ACL function else fallback to the arm_gemm function. * @@ -126,6 +90,19 @@ public: * @param[in] pretranspose_hint Can the B tensor can be pretransposed (ie shared across invocations)? */ void configure(const ITensor *a, const ITensor *b, ITensor *d, float alpha, float beta, bool pretranspose_hint); + + /** Indicates whether or not this function can be used to process the given parameters. + * + * @param[in] a Input tensor (Matrix A) + * @param[in] b Input tensor (Matrix B) + * @param[in] d Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Scalar multiplier to apply to AB matrix product. + * @param[in] beta Scalar multiplier to apply to input D matrix before adding product. + * @param[in] pretranspose_hint Can the B tensor can be pretransposed (ie shared across invocations)? + * + * @return a status. + */ + static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *d, float alpha, float beta, bool pretranspose_hint); /** Was the function successfully configured ? * * @return True if the function is configured and ready to run @@ -137,11 +114,5 @@ public: void run() override; }; -/** Float 32 assembly dispatch kernel */ -using NEGEMMAssemblyDispatchF32 = NEGEMMAssemblyDispatch<float, float>; -/** Uint 8 to Uint 32 assembly dispatch kernel */ -using NEGEMMAssemblyDispatchU8U32 = NEGEMMAssemblyDispatch<uint8_t, uint32_t>; -/** Int 8 to Int 32 assembly dispatch kernel */ -using NEGEMMAssemblyDispatchS8S32 = NEGEMMAssemblyDispatch<int8_t, int32_t>; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index 1564b6c983..8f41462b0b 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -169,7 +169,7 @@ private: private: MemoryGroup _memory_group; - NEGEMMAssemblyDispatchF32 _asm_glue; + NEGEMMAssemblyDispatch _asm_glue; NEIm2ColKernel _input_im2col_kernel; NEGEMMInterleave4x4Kernel _input_interleave_kernel; NEConvolutionLayerReshapeWeights _reshape_weights; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h index b6672d7584..27be34d1f8 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h @@ -58,14 +58,13 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEGEMMAssemblyDispatchU8U32 _asm_glue_unsigned; - NEGEMMAssemblyDispatchS8S32 _asm_glue_signed; - std::unique_ptr<INEKernel> _mm_kernel; - std::unique_ptr<INEKernel> _mtx_a_reshape_kernel; - std::unique_ptr<INEKernel> _mtx_b_reshape_kernel; - Tensor _tmp_a; - Tensor _tmp_b; + MemoryGroup _memory_group; + NEGEMMAssemblyDispatch _asm_glue; + std::unique_ptr<INEKernel> _mm_kernel; + std::unique_ptr<INEKernel> _mtx_a_reshape_kernel; + std::unique_ptr<INEKernel> _mtx_b_reshape_kernel; + Tensor _tmp_a; + Tensor _tmp_b; }; } #endif /*__ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 96ac7bb7e0..3db76f423c 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -98,8 +98,7 @@ public: private: MemoryGroup _memory_group; - NEGEMMAssemblyDispatchU8U32 _asm_glue_unsigned; - NEGEMMAssemblyDispatchS8S32 _asm_glue_signed; + NEGEMMAssemblyDispatch _asm_glue; std::unique_ptr<INEKernel> _mm_kernel; std::unique_ptr<INEKernel> _mtx_a_reshape_kernel; std::unique_ptr<INEKernel> _mtx_b_reshape_kernel; diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h index 384fbf893b..5da63311e0 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h @@ -43,7 +43,7 @@ class ITensor; * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method ) * -# @ref NEWinogradLayerTransformInputKernel * -# @ref NEWinogradLayerTransformOutputKernel - * -# @ref NEGEMMAssemblyDispatchF32 + * -# @ref NEGEMMAssemblyDispatch * -# @ref CPPPermute (three times: weights, input and output) * * @note Some Winograd configurations (i.e. F(2x2, 5x5), F(4x4, 5x5)) are supported only with enable_fast_math = true @@ -103,7 +103,7 @@ public: private: MemoryGroup _memory_group; - NEGEMMAssemblyDispatchF32 _asm_glue; + NEGEMMAssemblyDispatch _asm_glue; std::unique_ptr<INEKernel> _transform_input_kernel; std::unique_ptr<INEKernel> _transform_output_kernel; std::unique_ptr<INEKernel> _transform_weights_kernel; |