diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2024-07-02 15:45:01 +0100 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2024-07-02 16:00:11 +0000 |
commit | a3f238a44d9f306c77be0177f13d22ae3f3bcc57 (patch) | |
tree | 44bf40fb59fb8c4452d65d25e3a967c035bc6863 /src/cpu/kernels | |
parent | f92b0fffa0d32dc08340c1abfa1a7f09c6e53795 (diff) | |
download | ComputeLibrary-a3f238a44d9f306c77be0177f13d22ae3f3bcc57.tar.gz |
Revert "Update CPU kernels and add mixed sign GEMM support"
This reverts commit fc94f4d23abd4bc427b701f54ad85282e9ec7872 and 5d6fff041ade7eb44af0945867212f3979be3d3e (because the latter fixes a build failure caused by the former)
Change-Id: I7d07fea8307e9a7033b30874bbb14ba9202b23d8
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11815
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Adnan AlSinan <adnan.alsinan@arm.com>
Diffstat (limited to 'src/cpu/kernels')
-rw-r--r-- | src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp | 4 | ||||
-rw-r--r-- | src/cpu/kernels/CpuKernelSelectionTypes.h | 2 | ||||
-rw-r--r-- | src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h | 8 | ||||
-rw-r--r-- | src/cpu/kernels/assembly/arm_gemm.hpp | 12 | ||||
-rw-r--r-- | src/cpu/kernels/assembly/convolution_parameters.hpp | 2 | ||||
-rw-r--r-- | src/cpu/kernels/assembly/gemm_common.hpp | 18 |
6 files changed, 20 insertions, 26 deletions
diff --git a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp index 87340e566e..5b88735e7a 100644 --- a/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp +++ b/src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp @@ -684,10 +684,6 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons DataType::U8); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(src0->data_type() == DataType::QASYMM8_SIGNED && - src1->data_type() == DataType::QASYMM8, - "QASYMM8_SIGNED input with QASYMM8 weights not supported"); - TensorShape in0_shape = src0->tensor_shape(); TensorShape in1_shape = src1->tensor_shape(); TensorShape out_shape = dst->tensor_shape(); diff --git a/src/cpu/kernels/CpuKernelSelectionTypes.h b/src/cpu/kernels/CpuKernelSelectionTypes.h index 03a474de53..7c1e4772a6 100644 --- a/src/cpu/kernels/CpuKernelSelectionTypes.h +++ b/src/cpu/kernels/CpuKernelSelectionTypes.h @@ -105,7 +105,7 @@ struct SoftmaxKernelDataTypeISASelectorData cpuinfo::CpuIsaInfo isa; bool is_log; int axis; - uint64_t sme2_vector_length; + unsigned long sme2_vector_length; }; // Selector pointer types diff --git a/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h b/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h index 72fafca1bb..e2a27675b3 100644 --- a/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h +++ b/src/cpu/kernels/assembly/CpuGemmAssemblyWrapperKernel.h @@ -52,7 +52,7 @@ namespace kernel * * */ -template <typename TypeInput, typename TypeWeight, typename TypeOutput> +template <typename TypeInput, typename TypeOutput> class CpuGemmAssemblyWrapperKernel final : public INEKernel { public: @@ -101,7 +101,7 @@ public: * @param[in] kernel Pointer to an assembly kernel implementation. * @param[in] kernel_name_tag Tag to be attacehd to the kernel's name. */ - void configure(arm_gemm::GemmCommon<TypeInput, TypeWeight, TypeOutput> *kernel, std::string kernel_name_tag) + void configure(arm_gemm::GemmCommon<TypeInput, TypeOutput> *kernel, std::string kernel_name_tag) { ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel))); _kernel = kernel; @@ -131,8 +131,8 @@ public: } private: - arm_gemm::GemmCommon<TypeInput, TypeWeight, TypeOutput> *_kernel; - std::string _name; + arm_gemm::GemmCommon<TypeInput, TypeOutput> *_kernel; + std::string _name; }; } // namespace kernel } // namespace cpu diff --git a/src/cpu/kernels/assembly/arm_gemm.hpp b/src/cpu/kernels/assembly/arm_gemm.hpp index cbc8be416e..941fed0ba8 100644 --- a/src/cpu/kernels/assembly/arm_gemm.hpp +++ b/src/cpu/kernels/assembly/arm_gemm.hpp @@ -277,8 +277,8 @@ struct Nothing { }; -template <typename Tlop, typename Trop, typename Tret> -using UniqueGemmCommon = std::unique_ptr<GemmCommon<Tlop, Trop, Tret>>; +template <typename Top, typename Tret> +using UniqueGemmCommon = std::unique_ptr<GemmCommon<Top, Tret>>; /* Low level API calls. * These are implemented as 'GemmArgs' versions, or with the arguments explicitly listed. */ @@ -288,13 +288,13 @@ using UniqueGemmCommon = std::unique_ptr<GemmCommon<Tlop, Trop, Tret>>; template <typename Top, typename Tret, class OutputStage = Nothing> KernelDescription get_gemm_method(const GemmArgs &args, const OutputStage & = {}); -template <typename Tlop, typename Trop, typename Tret, class OutputStage = Nothing> -UniqueGemmCommon<Tlop, Trop, Tret> gemm(const GemmArgs &args, const OutputStage & = {}); +template <typename Top, typename Tret, class OutputStage = Nothing> +UniqueGemmCommon<Top, Tret> gemm(const GemmArgs &args, const OutputStage & = {}); -template <typename Tlop, typename Trop, typename Tret, class OutputStage = Nothing> +template <typename Top, typename Tret, class OutputStage = Nothing> std::vector<KernelDescription> get_compatible_kernels(const GemmArgs &args, const OutputStage & = {}); -template <typename Tlop, typename Trop, typename Tret, class OutputStage = Nothing> +template <typename Top, typename Tret, class OutputStage = Nothing> bool has_opt_gemm(WeightFormat &weight_format, const GemmArgs &args, const OutputStage & = {}); } // namespace arm_gemm diff --git a/src/cpu/kernels/assembly/convolution_parameters.hpp b/src/cpu/kernels/assembly/convolution_parameters.hpp index 09b73ca409..a6cf96344c 100644 --- a/src/cpu/kernels/assembly/convolution_parameters.hpp +++ b/src/cpu/kernels/assembly/convolution_parameters.hpp @@ -61,8 +61,6 @@ struct ConvolutionParameters int64_t output_stride_w; int64_t output_stride_h; // output_channels not included as they do not affect the input. - int64_t dilation_w; - int64_t dilation_h; int64_t padding_top; int64_t padding_left; float padding_value; diff --git a/src/cpu/kernels/assembly/gemm_common.hpp b/src/cpu/kernels/assembly/gemm_common.hpp index f693021fcb..45d1e43274 100644 --- a/src/cpu/kernels/assembly/gemm_common.hpp +++ b/src/cpu/kernels/assembly/gemm_common.hpp @@ -189,7 +189,7 @@ public: * 'set_arrays' to capture the provided arguments in protected class * members, as essentially any implementation will need these. */ -template <typename To, typename Tw, typename Tr> +template <typename To, typename Tr> class GemmCommon : public IGemmCommon { protected: @@ -197,7 +197,7 @@ protected: int _lda = 0; int _A_batch_stride = 0; int _A_multi_stride = 0; - const Tw *_Bptr = nullptr; + const To *_Bptr = nullptr; int _ldb = 0; int _B_multi_stride = 0; Tr *_Cptr = nullptr; @@ -214,7 +214,7 @@ public: const int lda, const int A_batch_stride, const int A_multi_stride, - const Tw *B, + const To *B, const int ldb, /* batches share B */ const int B_multi_stride, Tr *C, @@ -254,7 +254,7 @@ public: const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) override { - set_arrays(static_cast<const To *>(A), lda, A_batch_stride, A_multi_stride, static_cast<const Tw *>(B), ldb, + set_arrays(static_cast<const To *>(A), lda, A_batch_stride, A_multi_stride, static_cast<const To *>(B), ldb, B_multi_stride, static_cast<Tr *>(C), ldc, C_batch_stride, C_multi_stride, static_cast<const Tr *>(bias), bias_multi_stride); } @@ -262,17 +262,17 @@ public: /*** "Pretransposed" interface ***/ /* Compute col sums over all columns */ - virtual void requantize_bias(void *, const Tw *, const int, const int){}; + virtual void requantize_bias(void *, const To *, const int, const int){}; /* Perform pretranspose - the void * passed in must remain allocated for the duration of any execute calls. */ /* Arguments are: output buffer pointer, source pointer, source row stride, source multi stride */ - virtual void pretranspose_B_array(void *, const Tw *, const int, const int, bool){}; + virtual void pretranspose_B_array(void *, const To *, const int, const int, bool){}; /* Implementation of the void * overload which casts its arguments to the appropriate type. */ void pretranspose_B_array_generic( void *out, const void *in, const int row_stride, const int multi_stride, bool transposed) override { - pretranspose_B_array(out, static_cast<const Tw *>(in), row_stride, multi_stride, transposed); + pretranspose_B_array(out, static_cast<const To *>(in), row_stride, multi_stride, transposed); } /* Threaded versions of the above. @@ -280,7 +280,7 @@ public: * just calls the non-threaded functions to do the work. This is valid as with window size of 1 the only * legal values for start and end are 0 and 1 respectively. */ virtual void pretranspose_B_array_part( - void *out, const Tw *in, const int row_stride, const int multi_stride, bool transposed, size_t, size_t) + void *out, const To *in, const int row_stride, const int multi_stride, bool transposed, size_t, size_t) { pretranspose_B_array(out, in, row_stride, multi_stride, transposed); }; @@ -293,7 +293,7 @@ public: size_t start, size_t end) override { - pretranspose_B_array_part(out, static_cast<const Tw *>(in), row_stride, multi_stride, transposed, start, end); + pretranspose_B_array_part(out, static_cast<const To *>(in), row_stride, multi_stride, transposed, start, end); } /*** Indirect interface ***/ |