From ee2566914d3476b8103b88915f3b81bda8490b44 Mon Sep 17 00:00:00 2001 From: James Ward Date: Tue, 15 Nov 2022 11:36:47 +0000 Subject: FP16 improvements * Update FP16 resize to newest spec version * Correct casting to fp16 for graphs of >1 ops Change-Id: Iedff9a71eb7f72948b3c00a635bb0fd07d414bcd Signed-off-by: James Ward --- reference_model/src/arith_util.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'reference_model/src/arith_util.h') diff --git a/reference_model/src/arith_util.h b/reference_model/src/arith_util.h index 33bdeed..a75d7a3 100644 --- a/reference_model/src/arith_util.h +++ b/reference_model/src/arith_util.h @@ -30,17 +30,18 @@ #include #include #define __STDC_LIMIT_MACROS //enable min/max of plain data type -#include "func_debug.h" #include "func_config.h" +#include "func_debug.h" +#include "half.hpp" #include "inttypes.h" #include "tosa_generated.h" +#include +#include #include #include #include #include #include -#include -#include using namespace tosa; using namespace std; @@ -269,8 +270,12 @@ float fpTrunc(float f_in) truncateFloatToBFloat(&f_in, 1); break; case DType_FP16: - // TODO(jw): implement FP16 truncate function (no-op placeholder for now) - break; + // Cast to temporary float16 value before casting back to float32 + { + half_float::half h = half_float::half_cast(f_in); + f_in = half_float::half_cast(h); + break; + } case DType_FP32: // No-op for fp32 break; -- cgit v1.2.1