diff options
author | James Ward <james.ward@arm.com> | 2022-11-15 11:36:47 +0000 |
---|---|---|
committer | Eric Kunze <eric.kunze@arm.com> | 2022-11-29 15:55:26 +0000 |
commit | ee2566914d3476b8103b88915f3b81bda8490b44 (patch) | |
tree | 0d0dd56adafb3a65d896a192eeb736200eec8f06 /reference_model/src/arith_util.h | |
parent | 542dd3b8da39440026fa9e809eebd0a3b79cf95d (diff) | |
download | reference_model-ee2566914d3476b8103b88915f3b81bda8490b44.tar.gz |
FP16 improvements
* Update FP16 resize to newest spec version
* Correct casting to fp16 for graphs of >1 ops
Change-Id: Iedff9a71eb7f72948b3c00a635bb0fd07d414bcd
Signed-off-by: James Ward <james.ward@arm.com>
Diffstat (limited to 'reference_model/src/arith_util.h')
-rw-r--r-- | reference_model/src/arith_util.h | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/reference_model/src/arith_util.h b/reference_model/src/arith_util.h index 33bdeed..a75d7a3 100644 --- a/reference_model/src/arith_util.h +++ b/reference_model/src/arith_util.h @@ -30,17 +30,18 @@ #include <fenv.h> #include <math.h> #define __STDC_LIMIT_MACROS //enable min/max of plain data type -#include "func_debug.h" #include "func_config.h" +#include "func_debug.h" +#include "half.hpp" #include "inttypes.h" #include "tosa_generated.h" +#include <Eigen/Core> +#include <bitset> #include <cassert> #include <iostream> #include <limits> #include <stdint.h> #include <typeinfo> -#include <Eigen/Core> -#include <bitset> using namespace tosa; using namespace std; @@ -269,8 +270,12 @@ float fpTrunc(float f_in) truncateFloatToBFloat(&f_in, 1); break; case DType_FP16: - // TODO(jw): implement FP16 truncate function (no-op placeholder for now) - break; + // Cast to temporary float16 value before casting back to float32 + { + half_float::half h = half_float::half_cast<half_float::half, float>(f_in); + f_in = half_float::half_cast<float, half_float::half>(h); + break; + } case DType_FP32: // No-op for fp32 break; |