rfct(pff2): Small fixes and refactors

max-ishere · Feb 1, 2024 · f4d8e9b · f4d8e9b
1 parent 0f7ced4
commit f4d8e9b
Show file tree

Hide file tree

Showing 6 changed files with 114 additions and 105 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -8,6 +8,7 @@ edition = "2021"
 [dependencies]
 anyhow = "1.0.79"
 clap = { version = "4.4.18", features = ["derive"] }
+log = "0.4.20"
 nom = "7.1.3"
 thiserror = "1.0.56"
 

diff --git a/examples/pff2.rs b/examples/pff2.rs
@@ -41,15 +41,13 @@ fn render_glyphs(glyphs: &[Glyph], ascent: u16, descent: u16) {
 fn render_glyph(glyph: &Glyph) {
     if glyph.height == 0 || glyph.width == 0 {
         println!(
-            r" 0   {:8x}  {:8}
+            r" 0   {:-8}  {:-8}
 0  |
   0x0
 
 ",
             glyph.code,
-            char::from_u32(glyph.code)
-                .map(|c| format!(r#""{}""#, c))
-                .unwrap_or(r#""????""#.to_string()),
+            glyph.code.escape_unicode(),
         );
 
         return;
@@ -64,12 +62,10 @@ fn render_glyph(glyph: &Glyph) {
     const TRANSPARENT: Option<bool> = Some(Glyph::TRANSPARENT_PIXEL);
 
     println!(
-        "  {}   {:8x}  {:8}",
+        "  {}   {:-8} | {:-8}",
         (0..glyph.width).fold(String::new(), |acc, i| format!("{acc}{}", i % 8)),
+        glyph.code.escape_unicode(),
         glyph.code,
-        char::from_u32(glyph.code)
-            .map(|c| format!(r#""{}""#, c))
-            .unwrap_or(r#""????""#.to_string()),
     );
 
     let mut bytes = glyph.bitmap.iter().enumerate();
@@ -124,9 +120,9 @@ fn render_glyph(glyph: &Glyph) {
 
 fn print_glyph(glyph: &Glyph, ascent: u16, descent: u16) {
     println!(
-        "{c:2} U+{u:04x} | {w:2}w {h:2}h | {dx:3}dx {dy:3}dy | {W:2}W",
-        c = char::from_u32(glyph.code).unwrap_or('?'),
-        u = glyph.code,
+        "{c:2} {u} | {w:2}w {h:2}h | {dx:3}dx {dy:3}dy | {W:2}W",
+        c = glyph.code,
+        u = glyph.code.escape_unicode(),
         w = glyph.width,
         h = glyph.height,
         dx = glyph.x_offset,

diff --git a/src/lib.rs b/src/lib.rs
@@ -4,6 +4,9 @@ use std::rc::Rc;
 #[macro_use]
 extern crate test_case;
 
+#[macro_use]
+extern crate log;
+
 extern crate thiserror;
 
 pub mod parser {

diff --git a/src/parser/pff2.rs b/src/parser/pff2.rs
@@ -29,9 +29,6 @@ pub type Font = Pff2<Validated>;
 ///  on the requiements.
 pub type Parser = Pff2<Unchecked>;
 
-/// The internal representation of the UTF code point.
-type Codepoint = u32;
-
 /// The PFF2 font.
 ///
 /// Only contains relevant to GRUB metadata about the font as well as the glyph list.
@@ -66,7 +63,7 @@ pub struct Pff2<T: FontValidation> {
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Glyph {
     /// The UTF codepoint of the character
-    pub code: Codepoint,
+    pub code: char,
 
     // TODO: document these params
     pub width: usize,
@@ -84,11 +81,9 @@ impl Parser {
 
     /// Constructs [`Self`] from a PFF2 buffer.
     pub fn parse(input: &[u8]) -> Result<Self, ParserError> {
-        let input_for_data_section = input; // Save this because data offsets are absolute
+        let input_for_data_section = input; // Save this because SectionName::CharIndex offsets are absolute
 
-        let (magic, mut input) = input.split_at(4 + 4 + 4);
-        // This is technically a section, but because its always first and same content
-        // we just compare it in one go.
+        let (magic, mut input) = input.split_at(Self::MAGIC.len());
         if magic != Self::MAGIC {
             return Err(ParserError::BadMagicBytes);
         }
@@ -102,22 +97,23 @@ impl Parser {
                 let (section, length, input) = Self::parse_section_header(input)?;
 
                 let Ok(section) = SectionName::try_from(section) else {
+                    warn!("Skipping section {section:?} because it is not supported");
                     break 'input &input[length..];
                 };
 
                 use SectionName::*;
                 match section {
                     FontName => font.name = Self::parse_string(&input[..length])?,
                     Family => font.family = Self::parse_string(&input[..length])?,
-                    PointSize => font.point_size = Self::parse_u16(&input[..length])?,
+                    PointSize => font.point_size = Self::parse_u16_be(&input[..length])?,
                     Weight => font.weight = Self::parse_string(&input[..length])?,
-                    MaxCharWidth => font.max_char_width = Self::parse_u16(&input[..length])?,
-                    MaxCharHeight => font.max_char_height = Self::parse_u16(&input[..length])?,
-                    Ascent => font.ascent = Self::parse_u16(&input[..length])?,
-                    Descent => font.descent = Self::parse_u16(&input[..length])?,
+                    MaxCharWidth => font.max_char_width = Self::parse_u16_be(&input[..length])?,
+                    MaxCharHeight => font.max_char_height = Self::parse_u16_be(&input[..length])?,
+                    Ascent => font.ascent = Self::parse_u16_be(&input[..length])?,
+                    Descent => font.descent = Self::parse_u16_be(&input[..length])?,
                     CharIndex => char_indexes = Self::parse_char_indexes(&input[..length])?,
                     Data => {
-                        font.glyphs = Self::parse_data_section(char_indexes, input_for_data_section)?;
+                        font.glyphs = Self::parse_data_section(char_indexes, input_for_data_section);
                         break 'parsing;
                     }
                 }
@@ -133,15 +129,19 @@ impl Parser {
         Ok(font)
     }
 
-    /// Returns the section name, length as usize and the rest of the supplied input.
+    /// Returns the section name, length as usize and the rest of the supplied input starting at the beginning of
+    /// section content.
     fn parse_section_header(input: &[u8]) -> Result<([u8; 4], usize, &[u8]), ParserError> {
         let (section, input) = input.split_at(4);
         let section: [u8; 4] = section.try_into().map_err(|_| ParserError::InsufficientHeaderBytes)?;
 
         let (length, input) = input.split_at(4);
-
-        let length =
-            u32::from_be_bytes(length.try_into().map_err(|_| ParserError::InsufficientHeaderBytes)?).to_usize();
+        let length = u32::from_be_bytes(
+            length
+                .try_into()
+                .map_err(|_| ParserError::InsufficientLengthBytes { section })?,
+        )
+        .to_usize();
 
         Ok((section, length, input))
     }
@@ -160,83 +160,53 @@ impl Parser {
         String::from_utf8(input[..input.len()].to_vec())
     }
 
-    /// Converts the entirety of input into a u16. If the supplied slice is not 2b long, returns an error.
-    fn parse_u16(input: &[u8]) -> Result<u16, ParserError> {
+    /// Converts the entirety of input into a u16. If the supplied slice is not 2 bytes long, returns an error.
+    fn parse_u16_be(input: &[u8]) -> Result<u16, ParserError> {
         if input.len() != 2 {
             return Err(ParserError::InvalidU16Length(input.len()));
         }
 
         Ok(u16::from_be_bytes([input[0], input[1]]))
     }
 
-    /// Validates [`Self`] to be a valid font that can be used for rendering. See [`FontValidationError`] for reasons a
-    /// font may be invalid.
-    pub fn validate(self) -> Result<Font, FontValidationError> {
-        use FontValidationError::*;
-
-        if self.name.is_empty() {
-            return Err(EmptyName);
-        }
-
-        for (prop, err) in [
-            (self.max_char_width, ZeroMaxCharWidth),
-            (self.max_char_height, ZeroMaxCharHeight),
-            (self.ascent, ZeroAscent),
-            (self.descent, ZeroDescent),
-        ] {
-            if prop == 0 {
-                return Err(err);
-            }
-        }
-
-        if self.glyphs.len() == 0 {
-            return Err(NoGlyphs);
-        }
-
-        Ok(Font {
-            name: self.name,
-            family: self.family,
-            point_size: self.point_size,
-            weight: self.weight,
-            max_char_width: self.max_char_width,
-            max_char_height: self.max_char_height,
-            ascent: self.ascent,
-            descent: self.descent,
-            leading: self.leading,
-            glyphs: self.glyphs,
-            _validation: PhantomData,
-        })
-    }
-
     /// Parses the `CHIX` section and returns the glyph lookup table. Errors out if the character index is not alligned
-    /// properly for reading (length doesnt divide perfectly by allignment)
+    /// properly for reading (length doesnt divide perfectly by size of an entry)
     fn parse_char_indexes(input: &[u8]) -> Result<Vec<CharIndex>, ParserError> {
-        const ALLIGNMENT: usize = 4 + 1 + 4;
+        const ENTRY_SIZE: usize = 4 + 1 + 4;
 
-        if input.len() % ALLIGNMENT != 0 {
+        if input.len() % ENTRY_SIZE != 0 {
             return Err(ParserError::InvalidCharacterIndex);
         }
 
-        Ok(input
-            .chunks(ALLIGNMENT)
-            .map(|chunk| CharIndex {
-                code: u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]),
-                // skipp [4], it's a `storage_flags`, and GRUB never uses that field anyway
-                offset: u32::from_be_bytes([chunk[5], chunk[6], chunk[7], chunk[8]]).to_usize(),
+        input
+            .chunks(ENTRY_SIZE)
+            .map(|chunk| {
+                let codepoint = u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
+
+                Ok::<_, ParserError>(CharIndex {
+                    code: char::from_u32(codepoint).ok_or(ParserError::InvalidCodepoint(codepoint))?,
+                    //code: u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]),
+                    // skipp [4], it's a `storage_flags`, and GRUB never uses that field anyway
+                    offset: u32::from_be_bytes([chunk[5], chunk[6], chunk[7], chunk[8]]).to_usize(),
+                })
             })
-            .collect())
+            .collect()
     }
 
     /// Takes the glyph lookup section and combines it with the content of the data section to get the complete glyph
-    /// data.
-    fn parse_data_section(indexes: Vec<CharIndex>, input: &[u8]) -> Result<Rc<[Glyph]>, ParserError> {
-        let mut glyphs = Vec::with_capacity(input.len());
+    /// data. [`CharIndex`] offsets are file-global (absolute), so `input` should be the entirety of the file.
+    fn parse_data_section(indexes: Vec<CharIndex>, input: &[u8]) -> Rc<[Glyph]> {
+        let mut glyphs = Vec::with_capacity(indexes.len());
 
         for index in indexes {
             let offset = index.offset;
 
-            // make sure there are enough bytes to read glyph data
+            // make sure there are enough bytes to read the bitmap dimentions
             if offset + 4 > input.len() {
+                warn!(
+                    "Insufficient data to load a glyph for codepoint {}",
+                    index.code.escape_unicode(),
+                );
                 continue;
             }
 
@@ -245,7 +215,12 @@ impl Parser {
 
             let bitmap_len = (width * height + 7) / 8;
 
-            if offset + 12 + bitmap_len > input.len() {
+            // make sure there are enough bytes to read the bitmap and the rest of the fields
+            if offset + 10 + bitmap_len > input.len() {
+                warn!(
+                    "Insufficient data to load a glyph for codepoint {}",
+                    index.code.escape_unicode()
+                );
                 continue;
             }
 
@@ -260,13 +235,48 @@ impl Parser {
             };
 
             glyphs.push(glyph);
+        }
+
+        glyphs.into()
+    }
+
+    /// Validates [`Self`] to be a valid font that can be used for rendering. See [`FontValidationError`] for reasons a
+    /// font may be invalid.
+    pub fn validate(self) -> Result<Font, FontValidationError> {
+        use FontValidationError::*;
+
+        if self.name.is_empty() {
+            return Err(EmptyName);
+        }
 
-            if index.code == 0x21 {
-                // dbg!([input[offset + 10], input[offset + 11]]);
+        for (prop, err) in [
+            (self.max_char_width, ZeroMaxCharWidth),
+            (self.max_char_height, ZeroMaxCharHeight),
+            (self.ascent, ZeroAscent),
+            (self.descent, ZeroDescent),
+        ] {
+            if prop == 0 {
+                return Err(err);
             }
         }
 
-        Ok(Rc::from(glyphs.as_slice()))
+        if self.glyphs.len() == 0 {
+            return Err(NoGlyphs);
+        }
+
+        Ok(Font {
+            name: self.name,
+            family: self.family,
+            point_size: self.point_size,
+            weight: self.weight,
+            max_char_width: self.max_char_width,
+            max_char_height: self.max_char_height,
+            ascent: self.ascent,
+            descent: self.descent,
+            leading: self.leading,
+            glyphs: self.glyphs,
+            _validation: PhantomData,
+        })
     }
 }
 
@@ -286,7 +296,9 @@ enum SectionName {
 
 /// An intermediate structure used for reading glyphs from a font file. This is discarded after the glyphs are read.
 struct CharIndex {
-    pub code: Codepoint,
+    /// The UCS-4 codepoint
+    pub code: char,
+    /// A file-level (absolute) offset to the glyph data
     pub offset: usize,
 }
 
@@ -302,8 +314,8 @@ pub enum ParserError {
     InsufficientHeaderBytes,
 
     /// Expected to be able to read 4 bytes as a u32 length of the section
-    #[error("Insufficient section length bytes")]
-    InsufficientLengthBytes,
+    #[error("Insufficient bytes to read the length of section {section:?}")]
+    InsufficientLengthBytes { section: [u8; 4] },
 
     /// String stored in a section had illegal UTF-8 bytes
     #[error("Invalid UTF-8 string: {0}")]
@@ -316,6 +328,10 @@ pub enum ParserError {
     /// The size of the character index section doesnt divide evenly by the size of the individual elements
     #[error("Invalid data in the character index")]
     InvalidCharacterIndex,
+
+    /// The codepoint for a glyph entry was not valid UTF-8
+    #[error("Invalid unicode codepoint encountered: {0}")]
+    InvalidCodepoint(u32),
 }
 
 /// Convertion from [`Parser`] into [`Font`] failed