aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/attribute.def5
-rw-r--r--include/cfloat.h44
-rw-r--r--include/numpy_utils.h17
-rw-r--r--include/tosa_generated.h56
-rw-r--r--include/tosa_serialization_handler.h14
-rw-r--r--python/serializer/tosa_serializer.py50
-rw-r--r--python/tosa/ClampAttribute.py4
-rw-r--r--python/tosa/ConvAttribute.py6
-rw-r--r--python/tosa/CustomAttribute.py2
-rw-r--r--python/tosa/PadAttribute.py2
-rw-r--r--python/tosa/PoolAttribute.py6
-rw-r--r--python/tosa/ResizeAttribute.py117
-rw-r--r--python/tosa/TableAttribute.py2
-rw-r--r--python/tosa/TosaBasicBlock.py8
-rw-r--r--python/tosa/TosaGraph.py2
-rw-r--r--python/tosa/TosaOperator.py4
-rw-r--r--python/tosa/TosaRegion.py2
-rw-r--r--python/tosa/TosaTensor.py4
-rw-r--r--python/tosa/TransposeAttribute.py2
-rw-r--r--python/tosa/TransposeConvAttribute.py4
-rw-r--r--schema/tosa.fbs6
-rw-r--r--src/numpy_utils.cpp29
-rw-r--r--src/tosa_serialization_handler.cpp68
m---------third_party/flatbuffers0
24 files changed, 161 insertions, 293 deletions
diff --git a/include/attribute.def b/include/attribute.def
index 0e97629..52d5179 100644
--- a/include/attribute.def
+++ b/include/attribute.def
@@ -57,10 +57,7 @@ DEF_ATTRIBUTE(Pad, 1,
DEF_ATTRIBUTE(Axis, 1,
int32_t, S, axis)
-DEF_ATTRIBUTE(Resize, 4,
- int16_t, V, scale,
- int16_t, V, offset,
- int16_t, V, border,
+DEF_ATTRIBUTE(Resize, 1,
ResizeMode, S, mode)
DEF_ATTRIBUTE(Clamp, 2,
diff --git a/include/cfloat.h b/include/cfloat.h
index 0cf4896..cbbe09a 100644
--- a/include/cfloat.h
+++ b/include/cfloat.h
@@ -211,10 +211,33 @@ public:
if (in.is_nan() || in.is_infinity())
{
+ // The mapping of infinity to the destination type depends upon
+ // the overflow mode and the features of the destination type.
+ // OVERFLOW mode is the "expected" behaviour, in which exception
+ // values (NaN and infinity) map to themselves in the
+ // destination type (assuming they exist). In SATURATION mode,
+ // infinity maps to the largest absolute value of the
+ // destination type _even if_ an infinity encoding is available.
+ // See the FP8 specification document.
+ //
+ // By default, exceptional values are encoded with an all-1
+ // exponent field.
new_exponent_bits = (UINT64_C(1) << out_exp_bits) - 1;
if (in.is_nan())
{
+ // NaN always maps to NaN if it's available.
+ //
+ // NB: if the type has both NaN AND Infinity support, then
+ // the entirety of the significand can be used to encode
+ // different values of NaN (excepting significand = 0,
+ // which is reserved for infinity). This makes it possible
+ // to encode both quiet and signalling varieties.
+ // Generally, the LSB of the significand represents "not
+ // quiet". However, when there is only 1 NaN encoding
+ // (which is generally the case when infinity is not
+ // supported), then there cannot be separate quiet and
+ // signalling varieties of NaN.
if constexpr (out_type::has_inf)
{
// Copy across the `not_quiet bit`; set the LSB.
@@ -228,17 +251,18 @@ public:
new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
}
}
- else if constexpr (out_type::has_inf && overflow_mode == OverflowMode::Saturate)
+ else if constexpr (overflow_mode == OverflowMode::Saturate)
{
- new_exponent_bits -= 1;
- new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
- }
- else if constexpr (!out_type::has_inf && overflow_mode == OverflowMode::Saturate)
- {
- new_significand = (UINT64_C(1) << out_type::n_significand_bits) - (out_type::has_nan ? 2 : 1);
+ // In SATURATE mode, infinity in the input maps to the
+ // largest absolute value in the output type; even if
+ // infinity is available. This is in compliance with Table 3
+ // of the FP8 specification.
+ return out_type::max(sign_bit);
}
else if constexpr (!out_type::has_inf && overflow_mode == OverflowMode::Overflow)
{
+ // In OVERFLOW mode, infinities in the input type map to NaN
+ // in the output type, if infinity is not available.
new_significand = (UINT64_C(1) << out_type::n_significand_bits) - 1;
}
}
@@ -492,20 +516,20 @@ public:
{
// Where we have NaN and Infinity, exponents all `1` corresponds
// to some of these values.
- return from_bits(false, (UINT64_C(1) << n_exponent_bits) - 2, (UINT64_C(1) << n_significand_bits) - 1);
+ return from_bits(sign, (UINT64_C(1) << n_exponent_bits) - 2, (UINT64_C(1) << n_significand_bits) - 1);
}
else if constexpr (has_nan || has_inf)
{
// Where we have either NaN or infinity (but not both),
// exponents all `1` AND significand all `1` corresponds to the
// special value.
- return from_bits(false, (UINT64_C(1) << n_exponent_bits) - 1, (UINT64_C(1) << n_significand_bits) - 2);
+ return from_bits(sign, (UINT64_C(1) << n_exponent_bits) - 1, (UINT64_C(1) << n_significand_bits) - 2);
}
else
{
// With no special values to encode, the maximum value is
// encoded as all `1`s.
- return from_bits(false, (UINT64_C(1) << n_exponent_bits) - 1, (UINT64_C(1) << n_significand_bits) - 1);
+ return from_bits(sign, (UINT64_C(1) << n_exponent_bits) - 1, (UINT64_C(1) << n_significand_bits) - 1);
}
}
diff --git a/include/numpy_utils.h b/include/numpy_utils.h
index 60cf77e..ade2f2d 100644
--- a/include/numpy_utils.h
+++ b/include/numpy_utils.h
@@ -24,8 +24,13 @@
#include <cstring>
#include <vector>
+#include "cfloat.h"
#include "half.hpp"
+using bf16 = ct::cfloat<int16_t, 8, true, true, true>;
+using fp8e4m3 = ct::cfloat<int8_t, 4, true, true, false>;
+using fp8e5m2 = ct::cfloat<int8_t, 5, true, true, true>;
+
class NumpyUtilities
{
public:
@@ -85,6 +90,18 @@ public:
{
return "'<f2'";
}
+ if (std::is_same<T, bf16>::value)
+ {
+ return "'<V2'";
+ }
+ if (std::is_same<T, fp8e4m3>::value)
+ {
+ return "'<V1'";
+ }
+ if (std::is_same<T, fp8e5m2>::value)
+ {
+ return "'<f1'";
+ }
assert(false && "unsupported Dtype");
};
diff --git a/include/tosa_generated.h b/include/tosa_generated.h
index 1b5e164..61bc465 100644
--- a/include/tosa_generated.h
+++ b/include/tosa_generated.h
@@ -8,9 +8,9 @@
// Ensure the included flatbuffers.h is the same version as when this file was
// generated, otherwise it may not be compatible.
-static_assert(FLATBUFFERS_VERSION_MAJOR == 23 &&
- FLATBUFFERS_VERSION_MINOR == 5 &&
- FLATBUFFERS_VERSION_REVISION == 26,
+static_assert(FLATBUFFERS_VERSION_MAJOR == 24 &&
+ FLATBUFFERS_VERSION_MINOR == 3 &&
+ FLATBUFFERS_VERSION_REVISION == 7,
"Non-compatible flatbuffers version included");
namespace tosa {
@@ -1087,31 +1087,13 @@ inline ::flatbuffers::Offset<AxisAttribute> CreateAxisAttribute(
struct ResizeAttribute FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
typedef ResizeAttributeBuilder Builder;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
- VT_SCALE = 4,
- VT_OFFSET = 6,
- VT_BORDER = 8,
VT_MODE = 10
};
- const ::flatbuffers::Vector<int16_t> *scale() const {
- return GetPointer<const ::flatbuffers::Vector<int16_t> *>(VT_SCALE);
- }
- const ::flatbuffers::Vector<int16_t> *offset() const {
- return GetPointer<const ::flatbuffers::Vector<int16_t> *>(VT_OFFSET);
- }
- const ::flatbuffers::Vector<int16_t> *border() const {
- return GetPointer<const ::flatbuffers::Vector<int16_t> *>(VT_BORDER);
- }
tosa::ResizeMode mode() const {
return static_cast<tosa::ResizeMode>(GetField<uint32_t>(VT_MODE, 0));
}
bool Verify(::flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
- VerifyOffset(verifier, VT_SCALE) &&
- verifier.VerifyVector(scale()) &&
- VerifyOffset(verifier, VT_OFFSET) &&
- verifier.VerifyVector(offset()) &&
- VerifyOffset(verifier, VT_BORDER) &&
- verifier.VerifyVector(border()) &&
VerifyField<uint32_t>(verifier, VT_MODE, 4) &&
verifier.EndTable();
}
@@ -1121,15 +1103,6 @@ struct ResizeAttributeBuilder {
typedef ResizeAttribute Table;
::flatbuffers::FlatBufferBuilder &fbb_;
::flatbuffers::uoffset_t start_;
- void add_scale(::flatbuffers::Offset<::flatbuffers::Vector<int16_t>> scale) {
- fbb_.AddOffset(ResizeAttribute::VT_SCALE, scale);
- }
- void add_offset(::flatbuffers::Offset<::flatbuffers::Vector<int16_t>> offset) {
- fbb_.AddOffset(ResizeAttribute::VT_OFFSET, offset);
- }
- void add_border(::flatbuffers::Offset<::flatbuffers::Vector<int16_t>> border) {
- fbb_.AddOffset(ResizeAttribute::VT_BORDER, border);
- }
void add_mode(tosa::ResizeMode mode) {
fbb_.AddElement<uint32_t>(ResizeAttribute::VT_MODE, static_cast<uint32_t>(mode), 0);
}
@@ -1146,35 +1119,12 @@ struct ResizeAttributeBuilder {
inline ::flatbuffers::Offset<ResizeAttribute> CreateResizeAttribute(
::flatbuffers::FlatBufferBuilder &_fbb,
- ::flatbuffers::Offset<::flatbuffers::Vector<int16_t>> scale = 0,
- ::flatbuffers::Offset<::flatbuffers::Vector<int16_t>> offset = 0,
- ::flatbuffers::Offset<::flatbuffers::Vector<int16_t>> border = 0,
tosa::ResizeMode mode = tosa::ResizeMode_UNKNOWN) {
ResizeAttributeBuilder builder_(_fbb);
builder_.add_mode(mode);
- builder_.add_border(border);
- builder_.add_offset(offset);
- builder_.add_scale(scale);
return builder_.Finish();
}
-inline ::flatbuffers::Offset<ResizeAttribute> CreateResizeAttributeDirect(
- ::flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<int16_t> *scale = nullptr,
- const std::vector<int16_t> *offset = nullptr,
- const std::vector<int16_t> *border = nullptr,
- tosa::ResizeMode mode = tosa::ResizeMode_UNKNOWN) {
- auto scale__ = scale ? _fbb.CreateVector<int16_t>(*scale) : 0;
- auto offset__ = offset ? _fbb.CreateVector<int16_t>(*offset) : 0;
- auto border__ = border ? _fbb.CreateVector<int16_t>(*border) : 0;
- return tosa::CreateResizeAttribute(
- _fbb,
- scale__,
- offset__,
- border__,
- mode);
-}
-
struct ClampAttribute FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
typedef ClampAttributeBuilder Builder;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
diff --git a/include/tosa_serialization_handler.h b/include/tosa_serialization_handler.h
index 91b1a9d..c09a47d 100644
--- a/include/tosa_serialization_handler.h
+++ b/include/tosa_serialization_handler.h
@@ -28,7 +28,7 @@
// Keep version number in sync with the version default value with schema/tosa.fbs
#define TOSA_VERSION_MAJOR 1
-#define TOSA_VERSION_MINOR 0
+#define TOSA_VERSION_MINOR 1
#define TOSA_VERSION_PATCH 0
#define TOSA_VERSION_DRAFT true
#define TENSOR_BUFFER_FORCE_ALIGNMENT 8
@@ -412,9 +412,9 @@ public:
tosa_err_t LoadFileSchema(const char* schema_filename);
// data format conversion. little-endian.
- static tosa_err_t ConvertBF16toU8(const std::vector<float>& in, std::vector<uint8_t>& out);
- static tosa_err_t ConvertFP8E4M3toU8(const std::vector<float>& in, std::vector<uint8_t>& out);
- static tosa_err_t ConvertFP8E5M2toU8(const std::vector<float>& in, std::vector<uint8_t>& out);
+ static tosa_err_t ConvertBF16toU8(const std::vector<bf16>& in, std::vector<uint8_t>& out);
+ static tosa_err_t ConvertFP8E4M3toU8(const std::vector<fp8e4m3>& in, std::vector<uint8_t>& out);
+ static tosa_err_t ConvertFP8E5M2toU8(const std::vector<fp8e5m2>& in, std::vector<uint8_t>& out);
static tosa_err_t ConvertF16toU8(const std::vector<float>& in, std::vector<uint8_t>& out);
static tosa_err_t ConvertF32toU8(const std::vector<float>& in, std::vector<uint8_t>& out);
static tosa_err_t ConvertI64toU8(const std::vector<int64_t>& in, std::vector<uint8_t>& out);
@@ -425,9 +425,9 @@ public:
static tosa_err_t ConvertI4toU8(const std::vector<int8_t>& in, std::vector<uint8_t>& out);
static tosa_err_t ConvertBooltoU8(const std::vector<bool>& in, std::vector<uint8_t>& out);
- static tosa_err_t ConvertU8toBF16(const std::vector<uint8_t>& in, uint32_t out_size, std::vector<float>& out);
- static tosa_err_t ConvertU8toFP8E4M3(const std::vector<uint8_t>& in, uint32_t out_size, std::vector<float>& out);
- static tosa_err_t ConvertU8toFP8E5M2(const std::vector<uint8_t>& in, uint32_t out_size, std::vector<float>& out);
+ static tosa_err_t ConvertU8toBF16(const std::vector<uint8_t>& in, uint32_t out_size, std::vector<bf16>& out);
+ static tosa_err_t ConvertU8toFP8E4M3(const std::vector<uint8_t>& in, uint32_t out_size, std::vector<fp8e4m3>& out);
+ static tosa_err_t ConvertU8toFP8E5M2(const std::vector<uint8_t>& in, uint32_t out_size, std::vector<fp8e5m2>& out);
static tosa_err_t
ConvertU8toF16(const std::vector<uint8_t>& in, uint32_t out_size, std::vector<half_float::half>& out);
static tosa_err_t ConvertU8toF32(const std::vector<uint8_t>& in, uint32_t out_size, std::vector<float>& out);
diff --git a/python/serializer/tosa_serializer.py b/python/serializer/tosa_serializer.py
index d0445ae..c417fce 100644
--- a/python/serializer/tosa_serializer.py
+++ b/python/serializer/tosa_serializer.py
@@ -17,7 +17,7 @@ import serializer.tosa_serializer as ts
import json
import flatbuffers
import numpy as np
-import struct
+from ml_dtypes import bfloat16, float8_e4m3fn, float8_e5m2
from enum import IntEnum, unique
from tosa import (
TosaGraph,
@@ -32,7 +32,7 @@ import tosa.Op as TosaOp
# Keep version number in sync with the version default value with schema/tosa.fbs
TOSA_VERSION_MAJOR = 1
-TOSA_VERSION_MINOR = 0
+TOSA_VERSION_MINOR = 1
TOSA_VERSION_PATCH = 0
TOSA_VERSION_DRAFT = True
TOSA_VERSION = [
@@ -225,15 +225,12 @@ class TosaSerializerAttribute(TosaSerializerUnion):
self.ints.append((a.AddAxis, axis))
- def ResizeAttribute(self, scale, offset, border, mode):
+ def ResizeAttribute(self, mode):
from tosa import ResizeAttribute as a, Attribute
self.utype = Attribute.Attribute().ResizeAttribute
self.optFcns = (a.Start, a.End)
- self.int16vecs.append((a.AddScale, scale))
- self.int16vecs.append((a.AddOffset, offset))
- self.int16vecs.append((a.AddBorder, border))
self.ints.append((a.AddMode, mode))
def ClampAttribute(self, serializer_builder, min_val_as_bytes, max_val_as_bytes):
@@ -392,13 +389,14 @@ class TosaSerializerTensor:
self.shape = shape
self.dtype = dtype
- if (
- dtype == DType.FP32
- or dtype == DType.BF16
- or dtype == DType.FP8E4M3
- or dtype == DType.FP8E5M2
- ):
+ if dtype == DType.FP32:
fntype = np.float32
+ elif dtype == DType.BF16:
+ fntype = bfloat16
+ elif dtype == DType.FP8E4M3:
+ fntype = float8_e4m3fn
+ elif dtype == DType.FP8E5M2:
+ fntype = float8_e5m2
elif dtype == DType.FP16:
fntype = np.float16
else:
@@ -943,35 +941,19 @@ class TosaSerializer:
np_arr = np.array(data, dtype=np.float16)
u8_data.extend(np_arr.view(np.uint8))
elif dtype == DType.FP32:
- # for val in data:
- # b = struct.pack("!f", val)
- # u8_data.extend([b[3], b[2], b[1], b[0]])
np_arr = np.array(data, dtype=np.float32)
u8_data.extend(np_arr.view(np.uint8))
elif dtype == DType.BF16:
- for val in data:
- # convert val to little endian byte arrays b
- b = struct.pack("<f", val)
- # val => [ b[3], b[2], b[1], b[0] ]
- # keep only most significant 2 bytes for bf16
- # in little endian ordering
- u8_data.extend([b[2], b[3]])
+ np_arr = np.array(data, dtype=bfloat16)
+ u8_data.extend(np_arr.view(np.uint8))
elif dtype == DType.FP8E4M3:
for val in data:
- # convert val to fp8_bits then to single byte
- f32_as_int = struct.unpack(">L", struct.pack(">f", val))[0]
- f32_bits = f"{f32_as_int:032b}"
- fp8_bits = f32_bits[0] + f32_bits[1:5] + f32_bits[9:12]
- fp8_bytes = int(fp8_bits, 2).to_bytes(1, byteorder="little")
- u8_data.extend(fp8_bytes)
+ val_f8 = np.array(val).astype(float8_e4m3fn).view(np.uint8)
+ u8_data.append(val_f8)
elif dtype == DType.FP8E5M2:
for val in data:
- # convert val to fp8_bits then to single byte
- f32_as_int = struct.unpack(">L", struct.pack(">f", val))[0]
- f32_bits = f"{f32_as_int:032b}"
- fp8_bits = f32_bits[0] + f32_bits[1:6] + f32_bits[9:11]
- fp8_bytes = int(fp8_bits, 2).to_bytes(1, byteorder="little")
- u8_data.extend(fp8_bytes)
+ val_f8 = np.array(val).astype(float8_e5m2).view(np.uint8)
+ u8_data.append(val_f8)
elif dtype == TosaDType.DType:
# Serialize DType enum data as uint8 bytes
for val in data:
diff --git a/python/tosa/ClampAttribute.py b/python/tosa/ClampAttribute.py
index 6a41498..40254ec 100644
--- a/python/tosa/ClampAttribute.py
+++ b/python/tosa/ClampAttribute.py
@@ -97,7 +97,7 @@ def AddMinVal(builder, minVal):
def ClampAttributeStartMinValVector(builder, numElems):
return builder.StartVector(1, numElems, 1)
-def StartMinValVector(builder, numElems: int) -> int:
+def StartMinValVector(builder, numElems):
return ClampAttributeStartMinValVector(builder, numElems)
def ClampAttributeAddMaxVal(builder, maxVal):
@@ -109,7 +109,7 @@ def AddMaxVal(builder, maxVal):
def ClampAttributeStartMaxValVector(builder, numElems):
return builder.StartVector(1, numElems, 1)
-def StartMaxValVector(builder, numElems: int) -> int:
+def StartMaxValVector(builder, numElems):
return ClampAttributeStartMaxValVector(builder, numElems)
def ClampAttributeEnd(builder):
diff --git a/python/tosa/ConvAttribute.py b/python/tosa/ConvAttribute.py
index dfa75dc..1deca59 100644
--- a/python/tosa/ConvAttribute.py
+++ b/python/tosa/ConvAttribute.py
@@ -152,7 +152,7 @@ def AddPad(builder, pad):
def ConvAttributeStartPadVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartPadVector(builder, numElems: int) -> int:
+def StartPadVector(builder, numElems):
return ConvAttributeStartPadVector(builder, numElems)
def ConvAttributeAddStride(builder, stride):
@@ -164,7 +164,7 @@ def AddStride(builder, stride):
def ConvAttributeStartStrideVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartStrideVector(builder, numElems: int) -> int:
+def StartStrideVector(builder, numElems):
return ConvAttributeStartStrideVector(builder, numElems)
def ConvAttributeAddDilation(builder, dilation):
@@ -176,7 +176,7 @@ def AddDilation(builder, dilation):
def ConvAttributeStartDilationVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartDilationVector(builder, numElems: int) -> int:
+def StartDilationVector(builder, numElems):
return ConvAttributeStartDilationVector(builder, numElems)
def ConvAttributeAddInputZp(builder, inputZp):
diff --git a/python/tosa/CustomAttribute.py b/python/tosa/CustomAttribute.py
index db35dca..4c1c477 100644
--- a/python/tosa/CustomAttribute.py
+++ b/python/tosa/CustomAttribute.py
@@ -96,7 +96,7 @@ def AddImplementationAttrs(builder, implementationAttrs):
def CustomAttributeStartImplementationAttrsVector(builder, numElems):
return builder.StartVector(1, numElems, 1)
-def StartImplementationAttrsVector(builder, numElems: int) -> int:
+def StartImplementationAttrsVector(builder, numElems):
return CustomAttributeStartImplementationAttrsVector(builder, numElems)
def CustomAttributeEnd(builder):
diff --git a/python/tosa/PadAttribute.py b/python/tosa/PadAttribute.py
index 301bf17..8adf9f7 100644
--- a/python/tosa/PadAttribute.py
+++ b/python/tosa/PadAttribute.py
@@ -70,7 +70,7 @@ def AddPadConst(builder, padConst):
def PadAttributeStartPadConstVector(builder, numElems):
return builder.StartVector(1, numElems, 1)
-def StartPadConstVector(builder, numElems: int) -> int:
+def StartPadConstVector(builder, numElems):
return PadAttributeStartPadConstVector(builder, numElems)
def PadAttributeEnd(builder):
diff --git a/python/tosa/PoolAttribute.py b/python/tosa/PoolAttribute.py
index c13e038..831d43b 100644
--- a/python/tosa/PoolAttribute.py
+++ b/python/tosa/PoolAttribute.py
@@ -145,7 +145,7 @@ def AddPad(builder, pad):
def PoolAttributeStartPadVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartPadVector(builder, numElems: int) -> int:
+def StartPadVector(builder, numElems):
return PoolAttributeStartPadVector(builder, numElems)
def PoolAttributeAddKernel(builder, kernel):
@@ -157,7 +157,7 @@ def AddKernel(builder, kernel):
def PoolAttributeStartKernelVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartKernelVector(builder, numElems: int) -> int:
+def StartKernelVector(builder, numElems):
return PoolAttributeStartKernelVector(builder, numElems)
def PoolAttributeAddStride(builder, stride):
@@ -169,7 +169,7 @@ def AddStride(builder, stride):
def PoolAttributeStartStrideVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartStrideVector(builder, numElems: int) -> int:
+def StartStrideVector(builder, numElems):
return PoolAttributeStartStrideVector(builder, numElems)
def PoolAttributeAddInputZp(builder, inputZp):
diff --git a/python/tosa/ResizeAttribute.py b/python/tosa/ResizeAttribute.py
index 96bfa56..f2a6992 100644
--- a/python/tosa/ResizeAttribute.py
+++ b/python/tosa/ResizeAttribute.py
@@ -29,87 +29,6 @@ class ResizeAttribute(object):
self._tab = flatbuffers.table.Table(buf, pos)
# ResizeAttribute
- def Scale(self, j):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
- if o != 0:
- a = self._tab.Vector(o)
- return self._tab.Get(flatbuffers.number_types.Int16Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 2))
- return 0
-
- # ResizeAttribute
- def ScaleAsNumpy(self):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
- if o != 0:
- return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Int16Flags, o)
- return 0
-
- # ResizeAttribute
- def ScaleLength(self):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
- if o != 0:
- return self._tab.VectorLen(o)
- return 0
-
- # ResizeAttribute
- def ScaleIsNone(self):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
- return o == 0
-
- # ResizeAttribute
- def Offset(self, j):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
- if o != 0:
- a = self._tab.Vector(o)
- return self._tab.Get(flatbuffers.number_types.Int16Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 2))
- return 0
-
- # ResizeAttribute
- def OffsetAsNumpy(self):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
- if o != 0:
- return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Int16Flags, o)
- return 0
-
- # ResizeAttribute
- def OffsetLength(self):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
- if o != 0:
- return self._tab.VectorLen(o)
- return 0
-
- # ResizeAttribute
- def OffsetIsNone(self):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
- return o == 0
-
- # ResizeAttribute
- def Border(self, j):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
- if o != 0:
- a = self._tab.Vector(o)
- return self._tab.Get(flatbuffers.number_types.Int16Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 2))
- return 0
-
- # ResizeAttribute
- def BorderAsNumpy(self):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
- if o != 0:
- return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Int16Flags, o)
- return 0
-
- # ResizeAttribute
- def BorderLength(self):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
- if o != 0:
- return self._tab.VectorLen(o)
- return 0
-
- # ResizeAttribute
- def BorderIsNone(self):
- o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
- return o == 0
-
- # ResizeAttribute
def Mode(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10))
if o != 0:
@@ -122,42 +41,6 @@ def ResizeAttributeStart(builder):
def Start(builder):
ResizeAttributeStart(builder)
-def ResizeAttributeAddScale(builder, scale):
- builder.PrependUOffsetTRelativeSlot(0, flatbuffers.number_types.UOffsetTFlags.py_type(scale), 0)
-
-def AddScale(builder, scale):
- ResizeAttributeAddScale(builder, scale)
-
-def ResizeAttributeStartScaleVector(builder, numElems):
- return builder.StartVector(2, numElems, 2)
-
-def StartScaleVector(builder, numElems: int) -> int:
- return ResizeAttributeStartScaleVector(builder, numElems)
-
-def ResizeAttributeAddOffset(builder, offset):
- builder.PrependUOffsetTRelativeSlot(1, flatbuffers.number_types.UOffsetTFlags.py_type(offset), 0)
-
-def AddOffset(builder, offset):
- ResizeAttributeAddOffset(builder, offset)
-
-def ResizeAttributeStartOffsetVector(builder, numElems):
- return builder.StartVector(2, numElems, 2)
-
-def StartOffsetVector(builder, numElems: int) -> int:
- return ResizeAttributeStartOffsetVector(builder, numElems)
-
-def ResizeAttributeAddBorder(builder, border):
- builder.PrependUOffsetTRelativeSlot(2, flatbuffers.number_types.UOffsetTFlags.py_type(border), 0)
-
-def AddBorder(builder, border):
- ResizeAttributeAddBorder(builder, border)
-
-def ResizeAttributeStartBorderVector(builder, numElems):
- return builder.StartVector(2, numElems, 2)
-
-def StartBorderVector(builder, numElems: int) -> int:
- return ResizeAttributeStartBorderVector(builder, numElems)
-
def ResizeAttributeAddMode(builder, mode):
builder.PrependUint32Slot(3, mode, 0)
diff --git a/python/tosa/TableAttribute.py b/python/tosa/TableAttribute.py
index 6caa1f2..04193fa 100644
--- a/python/tosa/TableAttribute.py
+++ b/python/tosa/TableAttribute.py
@@ -70,7 +70,7 @@ def AddTable(builder, table):
def TableAttributeStartTableVector(builder, numElems):
return builder.StartVector(2, numElems, 2)
-def StartTableVector(builder, numElems: int) -> int:
+def StartTableVector(builder, numElems):
return TableAttributeStartTableVector(builder, numElems)
def TableAttributeEnd(builder):
diff --git a/python/tosa/TosaBasicBlock.py b/python/tosa/TosaBasicBlock.py
index b31f455..30ad0ee 100644
--- a/python/tosa/TosaBasicBlock.py
+++ b/python/tosa/TosaBasicBlock.py
@@ -146,7 +146,7 @@ def AddOperators(builder, operators):
def TosaBasicBlockStartOperatorsVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartOperatorsVector(builder, numElems: int) -> int:
+def StartOperatorsVector(builder, numElems):
return TosaBasicBlockStartOperatorsVector(builder, numElems)
def TosaBasicBlockAddTensors(builder, tensors):
@@ -158,7 +158,7 @@ def AddTensors(builder, tensors):
def TosaBasicBlockStartTensorsVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartTensorsVector(builder, numElems: int) -> int:
+def StartTensorsVector(builder, numElems):
return TosaBasicBlockStartTensorsVector(builder, numElems)
def TosaBasicBlockAddInputs(builder, inputs):
@@ -170,7 +170,7 @@ def AddInputs(builder, inputs):
def TosaBasicBlockStartInputsVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartInputsVector(builder, numElems: int) -> int:
+def StartInputsVector(builder, numElems):
return TosaBasicBlockStartInputsVector(builder, numElems)
def TosaBasicBlockAddOutputs(builder, outputs):
@@ -182,7 +182,7 @@ def AddOutputs(builder, outputs):
def TosaBasicBlockStartOutputsVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartOutputsVector(builder, numElems: int) -> int:
+def StartOutputsVector(builder, numElems):
return TosaBasicBlockStartOutputsVector(builder, numElems)
def TosaBasicBlockEnd(builder):
diff --git a/python/tosa/TosaGraph.py b/python/tosa/TosaGraph.py
index 84b51a7..520372b 100644
--- a/python/tosa/TosaGraph.py
+++ b/python/tosa/TosaGraph.py
@@ -85,7 +85,7 @@ def AddRegions(builder, regions):
def TosaGraphStartRegionsVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartRegionsVector(builder, numElems: int) -> int:
+def StartRegionsVector(builder, numElems):
return TosaGraphStartRegionsVector(builder, numElems)
def TosaGraphEnd(builder):
diff --git a/python/tosa/TosaOperator.py b/python/tosa/TosaOperator.py
index 2b889ad..19f2d2c 100644
--- a/python/tosa/TosaOperator.py
+++ b/python/tosa/TosaOperator.py
@@ -125,7 +125,7 @@ def AddInputs(builder, inputs):
def TosaOperatorStartInputsVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartInputsVector(builder, numElems: int) -> int:
+def StartInputsVector(builder, numElems):
return TosaOperatorStartInputsVector(builder, numElems)
def TosaOperatorAddOutputs(builder, outputs):
@@ -137,7 +137,7 @@ def AddOutputs(builder, outputs):
def TosaOperatorStartOutputsVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartOutputsVector(builder, numElems: int) -> int:
+def StartOutputsVector(builder, numElems):
return TosaOperatorStartOutputsVector(builder, numElems)
def TosaOperatorEnd(builder):
diff --git a/python/tosa/TosaRegion.py b/python/tosa/TosaRegion.py
index 7fd6e3c..80829da 100644
--- a/python/tosa/TosaRegion.py
+++ b/python/tosa/TosaRegion.py
@@ -81,7 +81,7 @@ def AddBlocks(builder, blocks):
def TosaRegionStartBlocksVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartBlocksVector(builder, numElems: int) -> int:
+def StartBlocksVector(builder, numElems):
return TosaRegionStartBlocksVector(builder, numElems)
def TosaRegionEnd(builder):
diff --git a/python/tosa/TosaTensor.py b/python/tosa/TosaTensor.py
index 3fb9f86..1311aac 100644
--- a/python/tosa/TosaTensor.py
+++ b/python/tosa/TosaTensor.py
@@ -138,7 +138,7 @@ def AddShape(builder, shape):
def TosaTensorStartShapeVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartShapeVector(builder, numElems: int) -> int:
+def StartShapeVector(builder, numElems):
return TosaTensorStartShapeVector(builder, numElems)
def TosaTensorAddType(builder, type):
@@ -156,7 +156,7 @@ def AddData(builder, data):
def TosaTensorStartDataVector(builder, numElems):
return builder.StartVector(1, numElems, 1)
-def StartDataVector(builder, numElems: int) -> int:
+def StartDataVector(builder, numElems):
return TosaTensorStartDataVector(builder, numElems)
def TosaTensorAddVariable(builder, variable):
diff --git a/python/tosa/TransposeAttribute.py b/python/tosa/TransposeAttribute.py
index 71cfdf0..5aa23e2 100644
--- a/python/tosa/TransposeAttribute.py
+++ b/python/tosa/TransposeAttribute.py
@@ -70,7 +70,7 @@ def AddPerms(builder, perms):
def TransposeAttributeStartPermsVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartPermsVector(builder, numElems: int) -> int:
+def StartPermsVector(builder, numElems):
return TransposeAttributeStartPermsVector(builder, numElems)
def TransposeAttributeEnd(builder):
diff --git a/python/tosa/TransposeConvAttribute.py b/python/tosa/TransposeConvAttribute.py
index 9dedfc4..2f7cdc7 100644
--- a/python/tosa/TransposeConvAttribute.py
+++ b/python/tosa/TransposeConvAttribute.py
@@ -125,7 +125,7 @@ def AddOutPad(builder, outPad):
def TransposeConvAttributeStartOutPadVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartOutPadVector(builder, numElems: int) -> int:
+def StartOutPadVector(builder, numElems):
return TransposeConvAttributeStartOutPadVector(builder, numElems)
def TransposeConvAttributeAddStride(builder, stride):
@@ -137,7 +137,7 @@ def AddStride(builder, stride):
def TransposeConvAttributeStartStrideVector(builder, numElems):
return builder.StartVector(4, numElems, 4)
-def StartStrideVector(builder, numElems: int) -> int:
+def StartStrideVector(builder, numElems):
return TransposeConvAttributeStartStrideVector(builder, numElems)
def TransposeConvAttributeAddInputZp(builder, inputZp):
diff --git a/schema/tosa.fbs b/schema/tosa.fbs
index cad6db7..1a2d952 100644
--- a/schema/tosa.fbs
+++ b/schema/tosa.fbs
@@ -191,9 +191,9 @@ table AxisAttribute {
}
table ResizeAttribute {
- scale: [int16];
- offset: [int16];
- border: [int16];
+ scale: [int16] (deprecated);
+ offset: [int16] (deprecated);
+ border: [int16] (deprecated);
mode: ResizeMode;
}
diff --git a/src/numpy_utils.cpp b/src/numpy_utils.cpp
index e4171d7..7cf5f94 100644
--- a/src/numpy_utils.cpp
+++ b/src/numpy_utils.cpp
@@ -247,6 +247,14 @@ NumpyUtilities::NPError NumpyUtilities::checkNpyHeader(FILE* infile, const uint3
while (isspace(*ptr))
ptr++;
+ // ml_dtypes writes '<f1' for 'numpy.dtype' in the header for float8_e5m2, but
+ // default NumPy does not understand this notation, which causes trouble
+ // when other code tries to open this file.
+ // To avoid this, '|u1' notation is used when the file is written, and the uint8
+ // data is viewed as float8_e5m2 later when the file is read.
+ if (!strcmp(dtype_str, "'<f1'"))
+ dtype_str = "'|u1'";
+
if (strcmp(ptr, dtype_str))
{
return FILE_TYPE_MISMATCH;
@@ -430,6 +438,13 @@ NumpyUtilities::NPError
memcpy(header, NUMPY_HEADER_STR, sizeof(NUMPY_HEADER_STR) - 1);
headerPos += sizeof(NUMPY_HEADER_STR) - 1;
+ // NumPy does not understand float8_e5m2, so change it to uint8 type, so that
+ // Python can read .npy files.
+ if (!strcmp(dtype_str, "'<f1'"))
+ {
+ dtype_str = "'|u1'";
+ }
+
// Output the format dictionary
// Hard-coded for I32 for now
headerPos += snprintf(header + headerPos, NUMPY_HEADER_SZ - headerPos,
@@ -438,7 +453,19 @@ NumpyUtilities::NPError
// Add shape contents (if any - as this will be empty for rank 0)
for (i = 0; i < shape.size(); i++)
{
- headerPos += snprintf(header + headerPos, NUMPY_HEADER_SZ - headerPos, " %d,", shape[i]);
+ // Output NumPy file from tosa_refmodel_sut_run generates the shape information
+ // without a trailing comma when the rank is greater than 1.
+ if (i == 0)
+ {
+ if (shape.size() == 1)
+ headerPos += snprintf(header + headerPos, NUMPY_HEADER_SZ - headerPos, "%d,", shape[i]);
+ else
+ headerPos += snprintf(header + headerPos, NUMPY_HEADER_SZ - headerPos, "%d", shape[i]);
+ }
+ else
+ {
+ headerPos += snprintf(header + headerPos, NUMPY_HEADER_SZ - headerPos, ", %d", shape[i]);
+ }
}
// Close off the dictionary
diff --git a/src/tosa_serialization_handler.cpp b/src/tosa_serialization_handler.cpp
index 0ce6211..74f66d8 100644
--- a/src/tosa_serialization_handler.cpp
+++ b/src/tosa_serialization_handler.cpp
@@ -19,9 +19,6 @@
#include <iostream>
using namespace tosa;
-using fp8e4m3 = ct::cfloat<int8_t, 4, true, true, false>;
-using fp8e5m2 = ct::cfloat<int8_t, 5, true, true, true>;
-
TosaSerializationTensor::TosaSerializationTensor(const flatbuffers::String* name,
const flatbuffers::Vector<int32_t>* shape,
DType dtype,
@@ -750,45 +747,41 @@ void TosaSerializationHandler::ForceAlignTensorData(std::vector<uint8_t>& buf)
}
}
-tosa_err_t TosaSerializationHandler::ConvertBF16toU8(const std::vector<float>& in, std::vector<uint8_t>& out)
+tosa_err_t TosaSerializationHandler::ConvertBF16toU8(const std::vector<bf16>& in, std::vector<uint8_t>& out)
{
// Note: Converts fp32->bf16 by ignoring the least significant 16 bits
out.clear();
for (auto val : in)
{
- uint32_t* val_u32 = reinterpret_cast<uint32_t*>(&val);
- uint8_t f32_byte2 = (*val_u32 >> 16) & 0xFF;
- uint8_t f32_byte3 = (*val_u32 >> 24) & 0xFF;
- // little endian: byte2 followed by byte3
- out.push_back(f32_byte2);
- out.push_back(f32_byte3);
+ uint8_t bf16_byte0 = val.bits() & 0xFF;
+ uint8_t bf16_byte1 = (val.bits() >> 8) & 0xFF;
+ out.push_back(bf16_byte0);
+ out.push_back(bf16_byte1);
}
ForceAlignTensorData(out);
return TOSA_OK;
}
-tosa_err_t TosaSerializationHandler::ConvertFP8E4M3toU8(const std::vector<float>& in, std::vector<uint8_t>& out)
+tosa_err_t TosaSerializationHandler::ConvertFP8E4M3toU8(const std::vector<fp8e4m3>& in, std::vector<uint8_t>& out)
{
// Note: Converts fp32->FP8E4M3 before converting to unint8_t
out.clear();
for (auto val : in)
{
- auto f8 = static_cast<fp8e4m3>(val);
- uint8_t b8 = f8.bits();
+ uint8_t b8 = val.bits();
out.push_back(b8);
}
ForceAlignTensorData(out);
return TOSA_OK;
}
-tosa_err_t TosaSerializationHandler::ConvertFP8E5M2toU8(const std::vector<float>& in, std::vector<uint8_t>& out)
+tosa_err_t TosaSerializationHandler::ConvertFP8E5M2toU8(const std::vector<fp8e5m2>& in, std::vector<uint8_t>& out)
{
// Note: Converts fp32->FP8E5M2 before converting to uint8_t
out.clear();
for (auto val : in)
{
- auto f8 = static_cast<fp8e5m2>(val);
- uint8_t b8 = f8.bits();
+ uint8_t b8 = val.bits();
out.push_back(b8);
}
ForceAlignTensorData(out);
@@ -944,11 +937,9 @@ tosa_err_t TosaSerializationHandler::ConvertBooltoU8(const std::vector<bool>& in
return TOSA_OK;
}
-tosa_err_t TosaSerializationHandler::ConvertU8toBF16(const std::vector<uint8_t>& in,
- uint32_t out_size,
- std::vector<float>& out)
+tosa_err_t
+ TosaSerializationHandler::ConvertU8toBF16(const std::vector<uint8_t>& in, uint32_t out_size, std::vector<bf16>& out)
{
- // Note: bf16 values returned in fp32 type
out.clear();
if (in.size() < out_size * sizeof(int16_t))
{
@@ -959,22 +950,21 @@ tosa_err_t TosaSerializationHandler::ConvertU8toBF16(const std::vector<uint8_t>&
for (uint32_t i = 0; i < out_size; i++)
{
- uint32_t f32_byte2 = in[i * sizeof(int16_t)];
- uint32_t f32_byte3 = in[i * sizeof(int16_t) + 1];
- uint32_t val_u32 = (f32_byte2 << 16) + (f32_byte3 << 24);
+ uint8_t bf16_byte0 = in[i * sizeof(int16_t)];
+ uint8_t bf16_byte1 = in[i * sizeof(int16_t) + 1];
+ uint16_t val_u16 = (bf16_byte0) + (bf16_byte1 << 8);
- // Reinterpret u32 bytes as fp32
- float val_f32 = *(float*)&val_u32;
- out.push_back(val_f32);
+ // Reinterpret u16 bytes as bf16
+ bf16 val_bf16 = *(bf16*)&val_u16;
+ out.push_back(val_bf16);
}
return TOSA_OK;
}
tosa_err_t TosaSerializationHandler::ConvertU8toFP8E4M3(const std::vector<uint8_t>& in,
uint32_t out_size,
- std::vector<float>& out)
+ std::vector<fp8e4m3>& out)
{
- // Note: FP8E4M3 values returned in fp32 type
out.clear();
if (in.size() < out_size * sizeof(int8_t))
{
@@ -985,17 +975,16 @@ tosa_err_t TosaSerializationHandler::ConvertU8toFP8E4M3(const std::vector<uint8_
for (uint32_t i = 0; i < out_size; i++)
{
- int8_t bits = static_cast<int8_t>(in[i * sizeof(int8_t)]);
- auto f8 = fp8e4m3::from_bits(bits);
- float val_f32 = static_cast<float>(f8);
- out.push_back(val_f32);
+ int8_t bits = static_cast<int8_t>(in[i * sizeof(int8_t)]);
+ auto f8 = fp8e4m3::from_bits(bits);
+ out.push_back(f8);
}
return TOSA_OK;
}
tosa_err_t TosaSerializationHandler::ConvertU8toFP8E5M2(const std::vector<uint8_t>& in,
uint32_t out_size,
- std::vector<float>& out)
+ std::vector<fp8e5m2>& out)
{
// Note: FP8E5M2 values returned in fp32 type
out.clear();
@@ -1008,10 +997,9 @@ tosa_err_t TosaSerializationHandler::ConvertU8toFP8E5M2(const std::vector<uint8_
for (uint32_t i = 0; i < out_size; i++)
{
- int8_t bits = static_cast<int8_t>(in[i * sizeof(int8_t)]);
- auto f8 = fp8e5m2::from_bits(bits);
- float val_f32 = static_cast<float>(f8);
- out.push_back(val_f32);
+ int8_t bits = static_cast<int8_t>(in[i * sizeof(int8_t)]);
+ auto f8 = fp8e5m2::from_bits(bits);
+ out.push_back(f8);
}
return TOSA_OK;
}
@@ -1031,9 +1019,9 @@ tosa_err_t TosaSerializationHandler::ConvertU8toF16(const std::vector<uint8_t>&
for (uint32_t i = 0; i < out_size; i++)
{
- uint16_t f16_byte0 = in[i * sizeof(int16_t)];
- uint16_t f16_byte1 = in[i * sizeof(int16_t) + 1];
- uint16_t val_u16 = f16_byte0 + (f16_byte1 << 8);
+ uint8_t f16_byte0 = in[i * sizeof(int16_t)];
+ uint8_t f16_byte1 = in[i * sizeof(int16_t) + 1];
+ uint16_t val_u16 = f16_byte0 + (f16_byte1 << 8);
// Reinterpret u16 byte as fp16 then convert to fp32
half_float::half val_f16 = *(half_float::half*)&val_u16;
diff --git a/third_party/flatbuffers b/third_party/flatbuffers
-Subproject 0100f6a5779831fa7a651e4b67ef389a8752bd9
+Subproject 6ff9e90e7e399f3977e99a315856b57c8afe5b4