From c70ba7ed73fc89fb50747d6683d662394fc277ad Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Fri, 17 Nov 2023 14:28:36 +0100 Subject: [PATCH] Support fp8_e4m3/fp8_e5m2 --- README.md | 8 +++++--- safetensors/src/tensor.rs | 8 ++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 240ed6dd..6280ce94 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,9 @@ Notes: from traditional tensor libraries perspective (torch, tensorflow, numpy, ..). - 0-rank Tensors (tensors with shape `[]`) are allowed, they are merely a scalar. - The byte buffer needs to be entirely indexed, and cannot contain holes. This prevents - the creation of polyglot files. + - Endianness: Little-endian. + moment. + - Order: 'C' or row-major. ### Yet another format ? @@ -113,7 +115,7 @@ formats. Let's take a look at alternatives and why this format is deemed interesting. This is my very personal and probably biased view: -| Format | Safe | Zero-copy | Lazy loading | No file size limit | Layout control | Flexibility | Bfloat16 +| Format | Safe | Zero-copy | Lazy loading | No file size limit | Layout control | Flexibility | Bfloat16/Fp8 | ----------------------- | --- | --- | --- | --- | --- | --- | --- | | pickle (PyTorch) | ✗ | ✗ | ✗ | 🗸 | ✗ | 🗸 | 🗸 | | H5 (Tensorflow) | 🗸 | ✗ | 🗸 | 🗸 | ~ | ~ | ✗ | @@ -133,7 +135,7 @@ some tensors in it without scanning the whole file (distributed setting) ? - Layout control: Lazy loading, is not necessarily enough since if the information about tensors is spread out in your file, then even if the information is lazily accessible you might have to access most of your file to read the available tensors (incurring many DISK -> RAM copies). Controlling the layout to keep fast access to single tensors is important. - No file size limit: Is there a limit to the file size ? - Flexibility: Can I save custom code in the format and be able to use it later with zero extra code ? (~ means we can store more than pure tensors, but no custom code) -- Bfloat16: Does the format support native bfloat16 (meaning no weird workarounds are +- Bfloat16/Fp8: Does the format support native bfloat16/fp8 (meaning no weird workarounds are necessary)? This is becoming increasingly important in the ML world. diff --git a/safetensors/src/tensor.rs b/safetensors/src/tensor.rs index f326f21b..0352b82a 100644 --- a/safetensors/src/tensor.rs +++ b/safetensors/src/tensor.rs @@ -641,6 +641,12 @@ pub enum Dtype { U8, /// Signed byte I8, + /// FP8 _ + #[allow(non_camel_case_types)] + F8_E5M2, + /// FP8 _ + #[allow(non_camel_case_types)] + F8_E4M3, /// Signed integer (16-bit) I16, /// Unsigned integer (16-bit) @@ -670,6 +676,8 @@ impl Dtype { Dtype::BOOL => 1, Dtype::U8 => 1, Dtype::I8 => 1, + Dtype::F8_E5M2 => 1, + Dtype::F8_E4M3 => 1, Dtype::I16 => 2, Dtype::U16 => 2, Dtype::I32 => 4,