aboutsummaryrefslogtreecommitdiff
path: root/reference_model/src/ops/type_conversion.cc
diff options
context:
space:
mode:
Diffstat (limited to 'reference_model/src/ops/type_conversion.cc')
-rw-r--r--reference_model/src/ops/type_conversion.cc79
1 files changed, 73 insertions, 6 deletions
diff --git a/reference_model/src/ops/type_conversion.cc b/reference_model/src/ops/type_conversion.cc
index f266675..dbedbad 100644
--- a/reference_model/src/ops/type_conversion.cc
+++ b/reference_model/src/ops/type_conversion.cc
@@ -15,6 +15,7 @@
#include "type_conversion.h"
#include "quant_util.h"
+#include "arith_util.h"
#include "template_types.h"
#include <cmath>
#include "half.hpp"
@@ -307,30 +308,88 @@ CastHelper<DType_BOOL, OutDtype>::CastHelper()
template <DType InDtype>
CastHelper<InDtype, DType_FP16>::CastHelper()
{
+ // Integer data converted to fp16 (stored as fp32)
fcn = [](InEigenType in) -> float {
- half_float::half out = half_float::half_cast<half_float::half, InEigenType>(in); // Cast to half_float
- return half_float::half_cast<float, half_float::half>(out); // Cast to float (underlying FP16 EigenType)
+ half_float::half h = half_float::half(in);
+ float out = half_float::half_cast<float, half_float::half>(h);
+ return out;
+ };
+}
+
+CastHelper<DType_FP32, DType_FP16>::CastHelper()
+{
+ // fp32 data converted to fp16 (stored as fp32)
+ fcn = [](float in) -> float {
+ float out = fpTrunc<DType_FP16>(in); // truncate required for conversion from higher precision
+ return out;
+ };
+}
+
+template <DType InDtype>
+CastHelper<InDtype, DType_BF16>::CastHelper()
+{
+ // Integer data converted to bf16 (stored as fp32)
+ fcn = [](InEigenType in) -> float {
+ float out = (float)in; // default cast to float is round_to_nearest_float()
+ return out;
+ };
+}
+
+CastHelper<DType_FP32, DType_BF16>::CastHelper()
+{
+ // fp32 data converted to bf16 (stored as fp32)
+ fcn = [](float in) -> float {
+ return fpTrunc<DType_BF16>(in); // truncate required for conversions from higher precision
};
}
template <DType OutDtype>
CastHelper<DType_FP16, OutDtype>::CastHelper()
{
- // Assuming InEigenType = float.
+ // fp16 data (stored as fp32) converted to integer
fcn = [](float in) -> OutEigenType {
- // Perform initial rounding in half-precision then cast back to float
- half_float::half h = half_float::half_cast<half_float::half, float>(in);
+ // Cast from float representation back to half_float before rounding
+ half_float::half h = half_float::half(in);
h = std::round(h);
- OutEigenType out = half_float::half_cast<float, half_float::half>(h);
+ OutEigenType out = half_float::half_cast<OutEigenType, half_float::half>(h);
out = std::max<OutEigenType>(out, OutMin);
out = std::min<OutEigenType>(out, OutMax);
return out;
};
}
+CastHelper<DType_FP16, DType_FP32>::CastHelper()
+{
+ // No-op since fp16 values treated internally as their fp32 representation
+ fcn = [](float in) -> OutEigenType {
+ return in;
+ };
+}
+
+template <DType OutDtype>
+CastHelper<DType_BF16, OutDtype>::CastHelper()
+{
+ // bf16 data (stored as fp32) converted to integer
+ fcn = [](float in) -> OutEigenType {
+ OutEigenType out = std::round(in);
+ out = std::max<OutEigenType>(out, OutMin);
+ out = std::min<OutEigenType>(out, OutMax);
+ return out;
+ };
+}
+
+CastHelper<DType_BF16, DType_FP32>::CastHelper()
+{
+ // No-op since bf16 values treated as truncated fp32 internally
+ fcn = [](InEigenType in) -> OutEigenType {
+ return in;
+ };
+}
+
template <DType InDtype>
CastHelper<InDtype, DType_FP32>::CastHelper()
{
+ // Integer data converted to fp32
fcn = [](InEigenType in) -> float {
float out = (OutEigenType)in; // default cast to float is round_to_nearest_float()
return out;
@@ -340,6 +399,7 @@ CastHelper<InDtype, DType_FP32>::CastHelper()
template <DType OutDtype>
CastHelper<DType_FP32, OutDtype>::CastHelper()
{
+ // fp32 data converted to integer
fcn = [](float in) -> OutEigenType {
OutEigenType out = std::round(in);
out = std::max<OutEigenType>(out, OutMin);
@@ -356,26 +416,33 @@ DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT8, BOOL);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT8, INT16);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT8, INT32);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT8, FP16);
+DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT8, BF16);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT8, FP32);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT16, BOOL);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT16, INT8);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT16, INT32);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT16, FP16);
+DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT16, BF16);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT16, FP32);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT32, BOOL);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT32, INT8);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT32, INT16);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT32, FP16);
+DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT32, BF16);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, INT32, FP32);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, FP16, INT8);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, FP16, INT16);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, FP16, INT32);
+DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, FP16, FP32);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, BF16, INT8);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, BF16, INT16);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, BF16, INT32);
+DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, BF16, FP32);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, FP32, INT8);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, FP32, INT16);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, FP32, INT32);
+DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, FP32, FP16);
+DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpCast, FP32, BF16);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpRescale, INT8, INT8);
DEF_INSTANTIATE_RANK0_6_ONE_RANK_TWO_TYPE(OpRescale, INT8, INT16);