diff --git a/Cargo.lock b/Cargo.lock index 288d7a0..ccce071 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,6 +114,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + [[package]] name = "memchr" version = "2.7.1" @@ -210,6 +216,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", + "log", "nom", "test-case", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 519f06c..9461af9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ edition = "2021" [dependencies] anyhow = "1.0.79" clap = { version = "4.4.18", features = ["derive"] } +log = "0.4.20" nom = "7.1.3" thiserror = "1.0.56" diff --git a/examples/pff2.rs b/examples/pff2.rs index 6c7d3a5..0aed5fb 100644 --- a/examples/pff2.rs +++ b/examples/pff2.rs @@ -41,15 +41,13 @@ fn render_glyphs(glyphs: &[Glyph], ascent: u16, descent: u16) { fn render_glyph(glyph: &Glyph) { if glyph.height == 0 || glyph.width == 0 { println!( - r" 0 {:8x} {:8} + r" 0 {:-8} {:-8} 0 | 0x0 ", glyph.code, - char::from_u32(glyph.code) - .map(|c| format!(r#""{}""#, c)) - .unwrap_or(r#""????""#.to_string()), + glyph.code.escape_unicode(), ); return; @@ -64,12 +62,10 @@ fn render_glyph(glyph: &Glyph) { const TRANSPARENT: Option = Some(Glyph::TRANSPARENT_PIXEL); println!( - " {} {:8x} {:8}", + " {} {:-8} | {:-8}", (0..glyph.width).fold(String::new(), |acc, i| format!("{acc}{}", i % 8)), + glyph.code.escape_unicode(), glyph.code, - char::from_u32(glyph.code) - .map(|c| format!(r#""{}""#, c)) - .unwrap_or(r#""????""#.to_string()), ); let mut bytes = glyph.bitmap.iter().enumerate(); @@ -124,9 +120,9 @@ fn render_glyph(glyph: &Glyph) { fn print_glyph(glyph: &Glyph, ascent: u16, descent: u16) { println!( - "{c:2} U+{u:04x} | {w:2}w {h:2}h | {dx:3}dx {dy:3}dy | {W:2}W", - c = char::from_u32(glyph.code).unwrap_or('?'), - u = glyph.code, + "{c:2} {u} | {w:2}w {h:2}h | {dx:3}dx {dy:3}dy | {W:2}W", + c = glyph.code, + u = glyph.code.escape_unicode(), w = glyph.width, h = glyph.height, dx = glyph.x_offset, diff --git a/src/lib.rs b/src/lib.rs index ab72e6f..9b5d2e3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,9 @@ use std::rc::Rc; #[macro_use] extern crate test_case; +#[macro_use] +extern crate log; + extern crate thiserror; pub mod parser { diff --git a/src/parser/pff2.rs b/src/parser/pff2.rs index a991af1..2ec584c 100644 --- a/src/parser/pff2.rs +++ b/src/parser/pff2.rs @@ -29,9 +29,6 @@ pub type Font = Pff2; /// on the requiements. pub type Parser = Pff2; -/// The internal representation of the UTF code point. -type Codepoint = u32; - /// The PFF2 font. /// /// Only contains relevant to GRUB metadata about the font as well as the glyph list. @@ -66,7 +63,7 @@ pub struct Pff2 { #[derive(Debug, Clone, PartialEq, Eq)] pub struct Glyph { /// The UTF codepoint of the character - pub code: Codepoint, + pub code: char, // TODO: document these params pub width: usize, @@ -84,11 +81,9 @@ impl Parser { /// Constructs [`Self`] from a PFF2 buffer. pub fn parse(input: &[u8]) -> Result { - let input_for_data_section = input; // Save this because data offsets are absolute + let input_for_data_section = input; // Save this because SectionName::CharIndex offsets are absolute - let (magic, mut input) = input.split_at(4 + 4 + 4); - // This is technically a section, but because its always first and same content - // we just compare it in one go. + let (magic, mut input) = input.split_at(Self::MAGIC.len()); if magic != Self::MAGIC { return Err(ParserError::BadMagicBytes); } @@ -102,6 +97,7 @@ impl Parser { let (section, length, input) = Self::parse_section_header(input)?; let Ok(section) = SectionName::try_from(section) else { + warn!("Skipping section {section:?} because it is not supported"); break 'input &input[length..]; }; @@ -109,15 +105,15 @@ impl Parser { match section { FontName => font.name = Self::parse_string(&input[..length])?, Family => font.family = Self::parse_string(&input[..length])?, - PointSize => font.point_size = Self::parse_u16(&input[..length])?, + PointSize => font.point_size = Self::parse_u16_be(&input[..length])?, Weight => font.weight = Self::parse_string(&input[..length])?, - MaxCharWidth => font.max_char_width = Self::parse_u16(&input[..length])?, - MaxCharHeight => font.max_char_height = Self::parse_u16(&input[..length])?, - Ascent => font.ascent = Self::parse_u16(&input[..length])?, - Descent => font.descent = Self::parse_u16(&input[..length])?, + MaxCharWidth => font.max_char_width = Self::parse_u16_be(&input[..length])?, + MaxCharHeight => font.max_char_height = Self::parse_u16_be(&input[..length])?, + Ascent => font.ascent = Self::parse_u16_be(&input[..length])?, + Descent => font.descent = Self::parse_u16_be(&input[..length])?, CharIndex => char_indexes = Self::parse_char_indexes(&input[..length])?, Data => { - font.glyphs = Self::parse_data_section(char_indexes, input_for_data_section)?; + font.glyphs = Self::parse_data_section(char_indexes, input_for_data_section); break 'parsing; } } @@ -133,15 +129,19 @@ impl Parser { Ok(font) } - /// Returns the section name, length as usize and the rest of the supplied input. + /// Returns the section name, length as usize and the rest of the supplied input starting at the beginning of + /// section content. fn parse_section_header(input: &[u8]) -> Result<([u8; 4], usize, &[u8]), ParserError> { let (section, input) = input.split_at(4); let section: [u8; 4] = section.try_into().map_err(|_| ParserError::InsufficientHeaderBytes)?; let (length, input) = input.split_at(4); - - let length = - u32::from_be_bytes(length.try_into().map_err(|_| ParserError::InsufficientHeaderBytes)?).to_usize(); + let length = u32::from_be_bytes( + length + .try_into() + .map_err(|_| ParserError::InsufficientLengthBytes { section })?, + ) + .to_usize(); Ok((section, length, input)) } @@ -160,8 +160,8 @@ impl Parser { String::from_utf8(input[..input.len()].to_vec()) } - /// Converts the entirety of input into a u16. If the supplied slice is not 2b long, returns an error. - fn parse_u16(input: &[u8]) -> Result { + /// Converts the entirety of input into a u16. If the supplied slice is not 2 bytes long, returns an error. + fn parse_u16_be(input: &[u8]) -> Result { if input.len() != 2 { return Err(ParserError::InvalidU16Length(input.len())); } @@ -169,74 +169,44 @@ impl Parser { Ok(u16::from_be_bytes([input[0], input[1]])) } - /// Validates [`Self`] to be a valid font that can be used for rendering. See [`FontValidationError`] for reasons a - /// font may be invalid. - pub fn validate(self) -> Result { - use FontValidationError::*; - - if self.name.is_empty() { - return Err(EmptyName); - } - - for (prop, err) in [ - (self.max_char_width, ZeroMaxCharWidth), - (self.max_char_height, ZeroMaxCharHeight), - (self.ascent, ZeroAscent), - (self.descent, ZeroDescent), - ] { - if prop == 0 { - return Err(err); - } - } - - if self.glyphs.len() == 0 { - return Err(NoGlyphs); - } - - Ok(Font { - name: self.name, - family: self.family, - point_size: self.point_size, - weight: self.weight, - max_char_width: self.max_char_width, - max_char_height: self.max_char_height, - ascent: self.ascent, - descent: self.descent, - leading: self.leading, - glyphs: self.glyphs, - _validation: PhantomData, - }) - } - /// Parses the `CHIX` section and returns the glyph lookup table. Errors out if the character index is not alligned - /// properly for reading (length doesnt divide perfectly by allignment) + /// properly for reading (length doesnt divide perfectly by size of an entry) fn parse_char_indexes(input: &[u8]) -> Result, ParserError> { - const ALLIGNMENT: usize = 4 + 1 + 4; + const ENTRY_SIZE: usize = 4 + 1 + 4; - if input.len() % ALLIGNMENT != 0 { + if input.len() % ENTRY_SIZE != 0 { return Err(ParserError::InvalidCharacterIndex); } - Ok(input - .chunks(ALLIGNMENT) - .map(|chunk| CharIndex { - code: u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]), - // skipp [4], it's a `storage_flags`, and GRUB never uses that field anyway - offset: u32::from_be_bytes([chunk[5], chunk[6], chunk[7], chunk[8]]).to_usize(), + input + .chunks(ENTRY_SIZE) + .map(|chunk| { + let codepoint = u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]); + + Ok::<_, ParserError>(CharIndex { + code: char::from_u32(codepoint).ok_or(ParserError::InvalidCodepoint(codepoint))?, + //code: u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]), + // skipp [4], it's a `storage_flags`, and GRUB never uses that field anyway + offset: u32::from_be_bytes([chunk[5], chunk[6], chunk[7], chunk[8]]).to_usize(), + }) }) - .collect()) + .collect() } /// Takes the glyph lookup section and combines it with the content of the data section to get the complete glyph - /// data. - fn parse_data_section(indexes: Vec, input: &[u8]) -> Result, ParserError> { - let mut glyphs = Vec::with_capacity(input.len()); + /// data. [`CharIndex`] offsets are file-global (absolute), so `input` should be the entirety of the file. + fn parse_data_section(indexes: Vec, input: &[u8]) -> Rc<[Glyph]> { + let mut glyphs = Vec::with_capacity(indexes.len()); for index in indexes { let offset = index.offset; - // make sure there are enough bytes to read glyph data + // make sure there are enough bytes to read the bitmap dimentions if offset + 4 > input.len() { + warn!( + "Insufficient data to load a glyph for codepoint {}", + index.code.escape_unicode(), + ); continue; } @@ -245,7 +215,12 @@ impl Parser { let bitmap_len = (width * height + 7) / 8; - if offset + 12 + bitmap_len > input.len() { + // make sure there are enough bytes to read the bitmap and the rest of the fields + if offset + 10 + bitmap_len > input.len() { + warn!( + "Insufficient data to load a glyph for codepoint {}", + index.code.escape_unicode() + ); continue; } @@ -260,13 +235,48 @@ impl Parser { }; glyphs.push(glyph); + } + + glyphs.into() + } + + /// Validates [`Self`] to be a valid font that can be used for rendering. See [`FontValidationError`] for reasons a + /// font may be invalid. + pub fn validate(self) -> Result { + use FontValidationError::*; + + if self.name.is_empty() { + return Err(EmptyName); + } - if index.code == 0x21 { - // dbg!([input[offset + 10], input[offset + 11]]); + for (prop, err) in [ + (self.max_char_width, ZeroMaxCharWidth), + (self.max_char_height, ZeroMaxCharHeight), + (self.ascent, ZeroAscent), + (self.descent, ZeroDescent), + ] { + if prop == 0 { + return Err(err); } } - Ok(Rc::from(glyphs.as_slice())) + if self.glyphs.len() == 0 { + return Err(NoGlyphs); + } + + Ok(Font { + name: self.name, + family: self.family, + point_size: self.point_size, + weight: self.weight, + max_char_width: self.max_char_width, + max_char_height: self.max_char_height, + ascent: self.ascent, + descent: self.descent, + leading: self.leading, + glyphs: self.glyphs, + _validation: PhantomData, + }) } } @@ -286,7 +296,9 @@ enum SectionName { /// An intermediate structure used for reading glyphs from a font file. This is discarded after the glyphs are read. struct CharIndex { - pub code: Codepoint, + /// The UCS-4 codepoint + pub code: char, + /// A file-level (absolute) offset to the glyph data pub offset: usize, } @@ -302,8 +314,8 @@ pub enum ParserError { InsufficientHeaderBytes, /// Expected to be able to read 4 bytes as a u32 length of the section - #[error("Insufficient section length bytes")] - InsufficientLengthBytes, + #[error("Insufficient bytes to read the length of section {section:?}")] + InsufficientLengthBytes { section: [u8; 4] }, /// String stored in a section had illegal UTF-8 bytes #[error("Invalid UTF-8 string: {0}")] @@ -316,6 +328,10 @@ pub enum ParserError { /// The size of the character index section doesnt divide evenly by the size of the individual elements #[error("Invalid data in the character index")] InvalidCharacterIndex, + + /// The codepoint for a glyph entry was not valid UTF-8 + #[error("Invalid unicode codepoint encountered: {0}")] + InvalidCodepoint(u32), } /// Convertion from [`Parser`] into [`Font`] failed diff --git a/src/render/pff2.rs b/src/render/pff2.rs index 78ea84f..d35a17d 100644 --- a/src/render/pff2.rs +++ b/src/render/pff2.rs @@ -13,16 +13,6 @@ impl Glyph { pub const TRANSPARENT_PIXEL: bool = false; pub fn pixel(&self, x: usize, y: usize) -> Option { - // dbg!(self.x_offset, self.y_offset); - - // let x = isize::try_from(x).ok()?; - // let x = x.checked_add(self.x_offset)?; - // let x = usize::try_from(x).ok()?; - - // let y = isize::try_from(y).ok()?; - // let y = y.checked_sub(self.y_offset)?; - // let y = usize::try_from(y).ok()?; - if !(x < self.width && y < self.height) { return None; } @@ -58,10 +48,6 @@ mod tests { ..Default::default() }; - for (i, x) in (0..width).enumerate() { - //println!("{i}: {:?}", glyph.pixel(x, 0)); - } - glyph.pixel(x, y).unwrap() as u8 } }