From 6103155b3a2a555c3fc4a3a2173b35ea573c9600 Mon Sep 17 00:00:00 2001 From: Tai Ly Date: Wed, 13 Mar 2024 19:19:53 +0000 Subject: [tosa_mlir_translator] Fix fp16, bf16 and fp8 serialization Fix serialization and deserialization of fp16, bf16 and fp8 for pad_const, clamp min_val/max_val, and const values Signed-off-by: Tai Ly Change-Id: Ia39a17d2f395584d5555d2c86cdae7113cf14e3f --- src/TosaSerialize.cpp | 270 +++++++++++++++++++++++++++++--------------------- 1 file changed, 157 insertions(+), 113 deletions(-) (limited to 'src/TosaSerialize.cpp') diff --git a/src/TosaSerialize.cpp b/src/TosaSerialize.cpp index 875303e..6553944 100644 --- a/src/TosaSerialize.cpp +++ b/src/TosaSerialize.cpp @@ -152,9 +152,29 @@ public: TosaSerializationHandler *GetTsh() const; TosaSerializationRegionBuilder *GetRegionBuilder() const; mlir::LogicalResult GetDataFromAttribute(mlir::Operation &op, - mlir::Attribute &attr, DType dtype, + mlir::Attribute &attr, + mlir::Type element_type, std::vector &u8_data) const; + // populate u8_data with either int64_value or float_value depending on + // element_type + mlir::LogicalResult + GetU8DataFromIntOrFloatValue(int64_t int64_value, float fp_value, + mlir::Type element_type, + std::vector &u8_data) const; + + // populate u8_data with int_value depending on non-float element_type + mlir::LogicalResult + GetU8DataFromIntValues(const std::vector &int_values, + mlir::Type element_type, + std::vector &u8_data) const; + + // populate u8_data with fp_value depending on float element_type + mlir::LogicalResult + GetU8DataFromFloatValues(const std::vector &fp_values, + mlir::Type element_type, + std::vector &u8_data) const; + private: std::string GetTensorName(mlir::Value val) const; std::string GetVariableTensorName(mlir::Operation *op) const; @@ -303,134 +323,146 @@ std::string TosaSerializationOperatorBuilder::GetVariableTensorName( } mlir::LogicalResult TosaSerializationOperatorBuilder::GetDataFromAttribute( - mlir::Operation &op, mlir::Attribute &attr, DType type, + mlir::Operation &op, mlir::Attribute &attr, mlir::Type element_type, std::vector &u8_data) const { + if (!element_type.isIntOrFloat()) { + return mlir::failure(); + } auto dense_attr = attr.dyn_cast(); - switch (type) { - case DType_FP32: - case DType_BF16: - case DType_FP16: - case DType_FP8E4M3: - case DType_FP8E5M2: { - std::vector data; + // handle float types + if (element_type.isa()) { + std::vector fp_data; auto val_attr = attr.dyn_cast(); if (dense_attr) { for (auto val : dense_attr.getValues()) { - data.push_back(val.convertToFloat()); + fp_data.push_back(val.convertToFloat()); } } else if (val_attr) { - data.push_back((float)val_attr.getValueAsDouble()); + fp_data.push_back((float)val_attr.getValueAsDouble()); } else { op.emitOpError("Unknown const attribute"); return mlir::failure(); } - if (type == DType_FP16) { - TosaSerializationHandler::ConvertF16toU8(data, u8_data); - } else { - // for all other floating types, store as F32 values - TosaSerializationHandler::ConvertF32toU8(data, u8_data); - } - break; + return GetU8DataFromFloatValues(fp_data, element_type, u8_data); } - case DType_INT8: { - std::vector data; - auto val_attr = attr.dyn_cast(); - if (dense_attr) { - for (auto val : dense_attr.getValues()) { - data.push_back(val); - } - } else if (val_attr) { - data.push_back(val_attr.getInt()); - } else { - op.emitOpError("Unknown const attribute"); - return mlir::failure(); + // element_type is integer type + + bool isInt48 = element_type.isInteger(48); + std::vector i64_data; + + auto val_attr = attr.dyn_cast(); + if (dense_attr) { + for (auto valueIt : dense_attr.getValues()) { + int64_t val = isInt48 ? static_cast(valueIt.getLimitedValue()) + : valueIt.getSExtValue(); + i64_data.push_back(val); } - TosaSerializationHandler::ConvertI8toU8(data, u8_data); - break; + } else if (val_attr) { + i64_data.push_back(val_attr.getInt()); + } else { + op.emitOpError("Unknown const attribute"); + return mlir::failure(); } - case DType_INT16: { - std::vector data; - auto val_attr = attr.dyn_cast(); - if (dense_attr) { - for (auto val : dense_attr.getValues()) { - data.push_back(val); - } - } else if (val_attr) { - data.push_back(val_attr.getInt()); - } else { - op.emitOpError("Unknown const attribute"); - return mlir::failure(); + return GetU8DataFromIntValues(i64_data, element_type, u8_data); +} + +mlir::LogicalResult TosaSerializationOperatorBuilder::GetU8DataFromIntValues( + const std::vector &int64_values, mlir::Type element_type, + std::vector &u8_data) const { + switch (element_type.getIntOrFloatBitWidth()) { + case 1: { + // bool use bool vec + std::vector bool_values; + for (auto v : int64_values) { + bool bool_value = v == 0 ? false : true; + bool_values.push_back(bool_value); } - TosaSerializationHandler::ConvertI16toU8(data, u8_data); + TosaSerializationHandler::ConvertBooltoU8(bool_values, u8_data); break; } - case DType_INT32: { - std::vector data; - auto val_attr = attr.dyn_cast(); - - if (dense_attr) { - for (auto val : dense_attr.getValues()) { - data.push_back(val); - } - } else if (val_attr) { - data.push_back(val_attr.getInt()); + case 4: + case 8: { + // I4 and I8 use int8_t vec + std::vector i8_values; + for (auto v : int64_values) { + i8_values.push_back(static_cast(v)); + } + if (element_type.isInteger(4)) { + TosaSerializationHandler::ConvertI4toU8(i8_values, u8_data); } else { - op.emitOpError("Unknown const attribute"); - return mlir::failure(); + TosaSerializationHandler::ConvertI8toU8(i8_values, u8_data); } - TosaSerializationHandler::ConvertI32toU8(data, u8_data); break; } - case DType_INT48: { - std::vector data; - auto val_attr = attr.dyn_cast(); - - if (dense_attr) { - for (auto valueIt : dense_attr.getValues()) { - uint64_t val = valueIt.getLimitedValue(); - data.push_back(val); - } - } else if (val_attr) { - data.push_back(val_attr.getInt()); - } else { - op.emitOpError("Unknown const attribute"); - return mlir::failure(); + case 16: { + // I16 use int16_t vec + std::vector i16_values; + for (auto v : int64_values) { + i16_values.push_back(static_cast(v)); } - TosaSerializationHandler::ConvertI48toU8(data, u8_data); + TosaSerializationHandler::ConvertI16toU8(i16_values, u8_data); break; } - case DType_BOOL: { - std::vector data; - auto val_attr = attr.dyn_cast(); - - if (dense_attr) { - for (auto val : dense_attr.getValues()) { - data.push_back(val); - } - } else if (val_attr) { - data.push_back(val_attr.getValue()); - } else { - op.emitOpError("Unknown const attribute"); - return mlir::failure(); + case 32: { + // I32 use int32_t vec + std::vector i32_values; + for (auto v : int64_values) { + i32_values.push_back(static_cast(v)); } - - TosaSerializationHandler::ConvertBooltoU8(data, u8_data); + TosaSerializationHandler::ConvertI32toU8(i32_values, u8_data); + break; + } + case 48: { + // I48 use int64_t vec + TosaSerializationHandler::ConvertI48toU8(int64_values, u8_data); break; } default: { - op.emitOpError("Unknown element type of const attribute"); + // unsupported bit widths return mlir::failure(); } } + return mlir::success(); +} +mlir::LogicalResult TosaSerializationOperatorBuilder::GetU8DataFromFloatValues( + const std::vector &fp_values, mlir::Type element_type, + std::vector &u8_data) const { + assert( + element_type + .isa()); // this should only be called for float type + if (element_type.isF16()) { + TosaSerializationHandler::ConvertF16toU8(fp_values, u8_data); + } else if (element_type.isBF16()) { + TosaSerializationHandler::ConvertBF16toU8(fp_values, u8_data); + } else if (element_type.isFloat8E4M3FN()) { + TosaSerializationHandler::ConvertFP8E4M3toU8(fp_values, u8_data); + } else if (element_type.isFloat8E5M2()) { + TosaSerializationHandler::ConvertFP8E5M2toU8(fp_values, u8_data); + } else if (element_type.isF32()) { + TosaSerializationHandler::ConvertF32toU8(fp_values, u8_data); + } else { + return mlir::failure(); + } return mlir::success(); } +mlir::LogicalResult +TosaSerializationOperatorBuilder::GetU8DataFromIntOrFloatValue( + int64_t int64_value, float fp_value, mlir::Type element_type, + std::vector &u8_data) const { + if (element_type.isa()) { + return GetU8DataFromFloatValues({fp_value}, element_type, u8_data); + } else { + return GetU8DataFromIntValues({int64_value}, element_type, u8_data); + } +} + // Main template to catch unimplemented translation. template TosaSerializationOperator * @@ -691,9 +723,12 @@ TosaSerializationOperatorBuilder::build( } std::vector u8_data; mlir::Attribute attr = op.getAttr(llvm::StringRef("value")); - DType type = ts->GetDtype(); + mlir::Type element_type = + llvm::cast(op.getResult(0).getType()).getElementType(); - if (GetDataFromAttribute(op, attr, type, u8_data).failed()) { + if (GetDataFromAttribute(op, attr, element_type, u8_data).failed()) { + op.emitOpError("ERROR: GetDataFromAttribute() fails when building value of " + "const tensor"); return nullptr; } @@ -977,26 +1012,32 @@ TosaSerializationOperatorBuilder::build( } std::vector min_val, max_val; + float min_fp, max_fp; + int64_t min_int, max_int; + if (input_element_type.isa()) { - auto min_fp = + min_fp = mlir::cast(min_val_attr).getValue().convertToFloat(); - auto max_fp = + max_fp = mlir::cast(max_val_attr).getValue().convertToFloat(); - TosaSerializationHandler::ConvertF32toU8({min_fp}, min_val); - TosaSerializationHandler::ConvertF32toU8({max_fp}, max_val); + min_int = max_int = 0; } else { - int32_t min_int = 0; - int32_t max_int = 0; - if (input_element_type.isUnsignedInteger()) { - min_int = mlir::cast(min_val_attr).getUInt(); - max_int = mlir::cast(max_val_attr).getUInt(); - } else { - assert(input_element_type.isa()); - min_int = mlir::cast(min_val_attr).getInt(); - max_int = mlir::cast(max_val_attr).getInt(); - } - TosaSerializationHandler::ConvertI32toU8({min_int}, min_val); - TosaSerializationHandler::ConvertI32toU8({max_int}, max_val); + assert(input_element_type.isa()); + min_int = mlir::cast(min_val_attr).getInt(); + max_int = mlir::cast(max_val_attr).getInt(); + min_fp = max_fp = 0.f; + } + + if (GetU8DataFromIntOrFloatValue(min_int, min_fp, input_element_type, min_val) + .failed()) { + op.emitOpError("Failed to serialize min value"); + return nullptr; + } + + if (GetU8DataFromIntOrFloatValue(max_int, max_fp, input_element_type, max_val) + .failed()) { + op.emitOpError("Failed to serialize max value"); + return nullptr; } std::string input_name = GetTensorName(op.getOperand(0)); @@ -1150,11 +1191,14 @@ TosaSerializationOperatorBuilder::build( std::vector pad_const; mlir::Type input_element_type = llvm::cast(op.getOperand(0).getType()).getElementType(); - if (input_element_type.isa()) { - TosaSerializationHandler::ConvertF32toU8({pad_const_fp}, pad_const); - } else { - TosaSerializationHandler::ConvertI32toU8({pad_const_int}, pad_const); + + if (GetU8DataFromIntOrFloatValue(pad_const_int, pad_const_fp, + input_element_type, pad_const) + .failed()) { + op.emitOpError("Failed to serialize pad_const value"); + return nullptr; } + TosaPadAttribute attribute(pad_const); TosaSerializationOperator *tyop = new TosaSerializationOperator( @@ -1806,11 +1850,11 @@ mlir::LogicalResult TosaSerializationBlockBuilder::BuildAllOpsInBlock( // zeros mlir::Attribute initial_value = op->getAttr("initial_value"); std::vector u8_data; - DType element_type = Type2DType(tensor_type.getElementType()); if (initial_value) { if (initial_value.isa()) { if (op_builder - .GetDataFromAttribute(*op, initial_value, element_type, u8_data) + .GetDataFromAttribute(*op, initial_value, + tensor_type.getElementType(), u8_data) .failed()) { llvm::errs() << "ERROR: GetDataFromAttribute() fails when building " "initial_value of variable tensor\n"; -- cgit v1.2.1