Skip to content

Commit

Permalink
modify some tests to truncate in the middle of a multibyte char
Browse files Browse the repository at this point in the history
  • Loading branch information
etseidl committed Dec 14, 2024
1 parent 400f5f8 commit 006a388
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions parquet/src/column/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3268,8 +3268,8 @@ mod tests {
assert_eq!(&r, "yyyyyyyz".as_bytes());

// 2-byte without overflow
let r = truncate_and_increment_utf8("ééééé", 8).unwrap();
assert_eq!(&r, "éééê".as_bytes());
let r = truncate_and_increment_utf8("ééééé", 7).unwrap();
assert_eq!(&r, "ééê".as_bytes());

// 2-byte that overflows lowest byte
let r = truncate_and_increment_utf8("\u{ff}\u{ff}\u{ff}\u{ff}\u{ff}", 8).unwrap();
Expand All @@ -3281,15 +3281,15 @@ mod tests {

// 3-byte without overflow [U+800, U+800, U+800] -> [U+800, U+801] (note that these
// characters should render right to left).
let r = truncate_and_increment_utf8("ࠀࠀࠀ", 8).unwrap();
let r = truncate_and_increment_utf8("ࠀࠀࠀࠀ", 8).unwrap();
assert_eq!(&r, "ࠀࠁ".as_bytes());

// max 3-byte should not truncate as it would need 4-byte code points
let r = truncate_and_increment_utf8("\u{ffff}\u{ffff}\u{ffff}", 8);
assert!(r.is_none());

// 4-byte without overflow
let r = truncate_and_increment_utf8("𐀀𐀀𐀀", 8).unwrap();
let r = truncate_and_increment_utf8("𐀀𐀀𐀀𐀀", 9).unwrap();
assert_eq!(&r, "𐀀𐀁".as_bytes());

// max 4-byte should not truncate
Expand Down

0 comments on commit 006a388

Please sign in to comment.