From a25d16c86f0d870408bc8b941aa755093417b0f0 Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Fri, 16 Nov 2018 11:33:12 +0000 Subject: COMPMID-1266 : Add support for FP16 in CLWinogradConvolutionLayer: 5x5 kernels Introduced F32 accumulation for F16 winograd gemm and output transform WinogradConvolution will be available for F16 only if fast math flag is enabled Change-Id: I215593c205236a0f9669218437bb40b184ec6a4f --- utils/Utils.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'utils') diff --git a/utils/Utils.h b/utils/Utils.h index 130e1f72fe..92ab1a30b9 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -181,6 +181,8 @@ inline std::string get_typestring(DataType data_type) return endianness + "u" + support::cpp11::to_string(sizeof(uint64_t)); case DataType::S64: return endianness + "i" + support::cpp11::to_string(sizeof(int64_t)); + case DataType::F16: + return endianness + "f" + support::cpp11::to_string(sizeof(half)); case DataType::F32: return endianness + "f" + support::cpp11::to_string(sizeof(float)); case DataType::F64: @@ -275,6 +277,43 @@ inline void unmap(GCTensor &tensor) } #endif /* ARM_COMPUTE_GC */ +/** Specialized class to generate random non-zero FP16 values. + * uniform_real_distribution generates values that get rounded off to zero, causing + * differences between ACL and reference implementation +*/ +class uniform_real_distribution_fp16 +{ + half min{ 0.0f }, max{ 0.0f }; + std::uniform_real_distribution neg{ min, -0.3f }; + std::uniform_real_distribution pos{ 0.3f, max }; + std::uniform_int_distribution sign_picker{ 0, 1 }; + +public: + using result_type = half; + /** Constructor + * + * @param[in] a Minimum value of the distribution + * @param[in] b Maximum value of the distribution + */ + explicit uniform_real_distribution_fp16(half a = half(0.0), half b = half(1.0)) + : min(a), max(b) + { + } + + /** () operator to generate next value + * + * @param[in] gen an uniform random bit generator object + */ + half operator()(std::mt19937 &gen) + { + if(sign_picker(gen)) + { + return (half)neg(gen); + } + return (half)pos(gen); + } +}; + /** Numpy data loader */ class NPYLoader { @@ -416,6 +455,7 @@ public: case arm_compute::DataType::QASYMM8: case arm_compute::DataType::S32: case arm_compute::DataType::F32: + case arm_compute::DataType::F16: { // Read data if(!are_layouts_different && !_fortran_order && tensor.info()->padding().empty()) @@ -699,6 +739,18 @@ void fill_random_tensor(T &tensor, float lower_bound, float upper_bound) switch(tensor.info()->data_type()) { + case arm_compute::DataType::F16: + { + std::uniform_real_distribution dist(lower_bound, upper_bound); + + execute_window_loop(window, [&](const Coordinates & id) + { + *reinterpret_cast(it.ptr()) = (half)dist(gen); + }, + it); + + break; + } case arm_compute::DataType::F32: { std::uniform_real_distribution dist(lower_bound, upper_bound); -- cgit v1.2.1