Skip to content

Commit

Permalink
wip utf8 tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
misson20000 committed Oct 26, 2024
1 parent 12424d0 commit 4c0b27f
Show file tree
Hide file tree
Showing 14 changed files with 304 additions and 24 deletions.
2 changes: 1 addition & 1 deletion src/model/datapath.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ impl DataPathExt for DataPath {
}

impl<'a> ByteRecordRange<'a> {
pub fn new(addr: u64, records: &'a mut [ByteRecord]) -> ByteRecordRange {
pub fn new(addr: u64, records: &'a mut [ByteRecord]) -> Self {
ByteRecordRange {
addr,
out: records,
Expand Down
6 changes: 5 additions & 1 deletion src/model/document/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ pub enum ContentDisplay {
line_pitch: addr::Size,
gutter_pitch: addr::Size,
},
Hexstring
Hexstring,
Utf8 {
max_line_length: addr::Size,
},
}

pub type Path = vec::Vec<usize>;
Expand Down Expand Up @@ -183,6 +186,7 @@ impl ContentDisplay {
ContentDisplay::None => None,
ContentDisplay::Hexdump { line_pitch, .. } => Some(*line_pitch),
ContentDisplay::Hexstring => None,
ContentDisplay::Utf8 { max_line_length, .. } => Some(*max_line_length),
}
}

Expand Down
62 changes: 50 additions & 12 deletions src/model/listing/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ pub enum LineType {
title: Option<token::TitleToken>,
token: token::HexstringToken,
},
Utf8 {
title: Option<token::TitleToken>,
token: token::Utf8Token,
},
Summary {
title: Option<token::TitleToken>,
tokens: collections::VecDeque<token::Token>
Expand Down Expand Up @@ -186,6 +190,7 @@ impl Line {
tokens: collections::VecDeque::from([token])
},
token::Token::Hexstring(token) => LineType::Hexstring { title: None, token },
token::Token::Utf8(token) => LineType::Utf8 { title: None, token },
},
}
}
Expand Down Expand Up @@ -278,6 +283,21 @@ impl Line {
token: hexstring_token,
}, LinePushResult::Accepted),

/* A utf8 token can end a line */
(LineType::Empty, token::Token::Utf8(token)) => (LineType::Utf8 {
title: None,
token
}, LinePushResult::Accepted),

/* A title token can occur on the same line as a utf8 if the title is inline and there isn't already a title. */
(LineType::Utf8 { title: None, token: utf8_token }, token::Token::Title(token))
if sync::Arc::ptr_eq(&token.common.node, &utf8_token.common.node)
&& token.common.node.props.title_display.is_inline()
=> (LineType::Utf8 {
title: Some(token),
token: utf8_token,
}, LinePushResult::Accepted),

/* Summaries... */
(LineType::Empty, token::Token::SummaryEpilogue(token)) => (LineType::Summary {
title: None,
Expand Down Expand Up @@ -393,6 +413,21 @@ impl Line {
title: Some(title_token),
token,
}, LinePushResult::Accepted),

/* A utf8 token can begin a line */
(LineType::Empty, token::Token::Utf8(token)) => (LineType::Utf8 {
title: None,
token
}, LinePushResult::Completed),

/* A utf8 token can occur on the same line as a title if the title is inline. */
(LineType::Title(title_token), token::Token::Utf8(token))
if sync::Arc::ptr_eq(title_token.node(), token.node())
&& title_token.node().props.title_display.is_inline()
=> (LineType::Utf8 {
title: Some(title_token),
token,
}, LinePushResult::Accepted),

/* Summaries... */
(LineType::Empty, token::Token::SummaryPreamble(token)) => (LineType::Summary {
Expand Down Expand Up @@ -445,12 +480,13 @@ impl Line {
let token_mapper: for<'b> fn(&'b token::Token) -> token::TokenRef<'b> = TokenKind::as_ref;

match &self.ty {
LineType::Empty => util::PhiIteratorOf5::I1(iter::empty()),
LineType::Blank(t) => util::PhiIteratorOf5::I2(iter::once(t.as_ref())),
LineType::Title(t) => util::PhiIteratorOf5::I2(iter::once(t.as_ref())),
LineType::Hexdump { title, tokens, .. } => util::PhiIteratorOf5::I3(title.as_ref().map(TokenKind::as_ref).into_iter().chain(tokens.iter().map(hexdump_mapper))),
LineType::Hexstring { title, token, .. } => util::PhiIteratorOf5::I4(title.as_ref().map(TokenKind::as_ref).into_iter().chain(iter::once(token.as_ref()))),
LineType::Summary { title, tokens, .. } => util::PhiIteratorOf5::I5(title.as_ref().map(TokenKind::as_ref).into_iter().chain(tokens.iter().map(token_mapper))),
LineType::Empty => util::PhiIteratorOf6::I1(iter::empty()),
LineType::Blank(t) => util::PhiIteratorOf6::I2(iter::once(t.as_ref())),
LineType::Title(t) => util::PhiIteratorOf6::I2(iter::once(t.as_ref())),
LineType::Hexdump { title, tokens, .. } => util::PhiIteratorOf6::I3(title.as_ref().map(TokenKind::as_ref).into_iter().chain(tokens.iter().map(hexdump_mapper))),
LineType::Hexstring { title, token, .. } => util::PhiIteratorOf6::I4(title.as_ref().map(TokenKind::as_ref).into_iter().chain(iter::once(token.as_ref()))),
LineType::Utf8 { title, token, .. } => util::PhiIteratorOf6::I5(title.as_ref().map(TokenKind::as_ref).into_iter().chain(iter::once(token.as_ref()))),
LineType::Summary { title, tokens, .. } => util::PhiIteratorOf6::I6(title.as_ref().map(TokenKind::as_ref).into_iter().chain(tokens.iter().map(token_mapper))),
}
}

Expand All @@ -459,12 +495,13 @@ impl Line {
let hexdump_mapper: fn(token::HexdumpToken) -> token::Token = TokenKind::into_token;

match self.ty {
LineType::Empty => util::PhiIteratorOf5::I1(iter::empty()),
LineType::Blank(t) => util::PhiIteratorOf5::I2(iter::once(t.into_token())),
LineType::Title(t) => util::PhiIteratorOf5::I2(iter::once(t.into_token())),
LineType::Hexdump { title, tokens, .. } => util::PhiIteratorOf5::I3(title.map(TokenKind::into_token).into_iter().chain(tokens.into_iter().map(hexdump_mapper))),
LineType::Hexstring { title, token, .. } => util::PhiIteratorOf5::I4(title.map(TokenKind::into_token).into_iter().chain(iter::once(token.into_token()))),
LineType::Summary { title, tokens, .. } => util::PhiIteratorOf5::I5(title.map(TokenKind::into_token).into_iter().chain(tokens.into_iter())),
LineType::Empty => util::PhiIteratorOf6::I1(iter::empty()),
LineType::Blank(t) => util::PhiIteratorOf6::I2(iter::once(t.into_token())),
LineType::Title(t) => util::PhiIteratorOf6::I2(iter::once(t.into_token())),
LineType::Hexdump { title, tokens, .. } => util::PhiIteratorOf6::I3(title.map(TokenKind::into_token).into_iter().chain(tokens.into_iter().map(hexdump_mapper))),
LineType::Hexstring { title, token, .. } => util::PhiIteratorOf6::I4(title.map(TokenKind::into_token).into_iter().chain(iter::once(token.into_token()))),
LineType::Utf8 { title, token, .. } => util::PhiIteratorOf6::I5(title.map(TokenKind::into_token).into_iter().chain(iter::once(token.into_token()))),
LineType::Summary { title, tokens, .. } => util::PhiIteratorOf6::I6(title.map(TokenKind::into_token).into_iter().chain(tokens.into_iter())),
}
}
}
Expand Down Expand Up @@ -535,6 +572,7 @@ impl fmt::Debug for Line {
LineType::Title(_) => &"title",
LineType::Hexdump { .. } => &"hexdump",
LineType::Hexstring { .. } => &"hexstring",
LineType::Utf8 { .. } => &"utf8",
LineType::Summary { .. } => &"summary",
})
.field("tokens", &self.iter_tokens().map(|tok| token::TokenTestFormat(tok)).collect::<Vec<_>>())
Expand Down
57 changes: 56 additions & 1 deletion src/model/listing/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ enum PositionState {
index: usize
},
Hexstring(addr::Extent, usize),
Utf8 {
extent: addr::Extent,
index: usize
},

SummaryPreamble,
SummaryOpener,
Expand Down Expand Up @@ -352,6 +356,7 @@ impl Position {
PositionState::MetaContent(offset, index) => IntermediatePortState::NormalContent(Some(destructured_childhood.offset + offset.to_size()), destructured_child_index + *index),
PositionState::Hexdump { extent, index, .. } => IntermediatePortState::NormalContent(Some(destructured_childhood.offset + extent.begin.to_size()), destructured_child_index + *index),
PositionState::Hexstring(extent, index) => IntermediatePortState::NormalContent(Some(destructured_childhood.offset + extent.begin.to_size()), destructured_child_index + *index),
PositionState::Utf8 { extent, index } => IntermediatePortState::NormalContent(Some(destructured_childhood.offset + extent.begin.to_size()), destructured_child_index + *index),
PositionState::SummaryLeaf => IntermediatePortState::NormalContent(Some(destructured_childhood.offset), *destructured_child_index),

PositionState::SummaryLabel(i)
Expand Down Expand Up @@ -379,6 +384,7 @@ impl Position {
PositionState::MetaContent(offset, index) => IntermediatePortState::NormalContent(Some(*offset), *index),
PositionState::Hexdump { extent, index, .. } => IntermediatePortState::NormalContent(Some(extent.begin), *index),
PositionState::Hexstring(extent, index) => IntermediatePortState::NormalContent(Some(extent.begin), *index),
PositionState::Utf8 { extent, index } => IntermediatePortState::NormalContent(Some(extent.begin), *index),

PositionState::SummaryPreamble => IntermediatePortState::Finished(PositionState::Title),
PositionState::SummaryOpener => IntermediatePortState::NormalContent(Some(addr::unit::NULL), 0),
Expand All @@ -404,6 +410,7 @@ impl Position {
PositionState::MetaContent(_, index) => IntermediatePortState::SummaryLabel(*index),
PositionState::Hexdump { index, .. } => IntermediatePortState::SummaryLabel(*index),
PositionState::Hexstring(_, index) => IntermediatePortState::SummaryLabel(*index),
PositionState::Utf8 { index, .. } => IntermediatePortState::SummaryLabel(*index),

PositionState::SummaryPreamble => IntermediatePortState::Finished(PositionState::SummaryPreamble),
PositionState::SummaryOpener => IntermediatePortState::Finished(PositionState::SummaryOpener),
Expand Down Expand Up @@ -707,6 +714,11 @@ impl Position {
line: line_extent,
}.into_token()),
PositionState::Hexstring(extent, _) => TokenGenerationResult::Ok(token::HexstringToken::new_maybe_truncate(common.adjust_depth(1), extent).into_token()),
PositionState::Utf8 { extent, .. } => TokenGenerationResult::Ok(token::Utf8Token {
common: common.adjust_depth(1),
extent,
truncated: false,
}.into_token()),

PositionState::SummaryPreamble => TokenGenerationResult::Ok(token::SummaryPreambleToken {
common,
Expand Down Expand Up @@ -761,6 +773,7 @@ impl Position {
// Disallow hexdumps in summaries. This is a little nasty. Review later.
structure::ContentDisplay::Hexdump { .. } => token::HexstringToken::new_maybe_truncate(common, extent).into_token(),
structure::ContentDisplay::Hexstring => token::HexstringToken::new_maybe_truncate(common, extent).into_token(),
structure::ContentDisplay::Utf8 { max_line_length, .. } => token::Utf8Token::new_maybe_truncate(common, extent, max_line_length).into_token(),
})
},
PositionState::SummaryValueEnd => TokenGenerationResult::Skip,
Expand Down Expand Up @@ -883,8 +896,16 @@ impl Position {
line_extent,
index
}
}
},
structure::ContentDisplay::Hexstring => PositionState::Hexstring(interstitial, index),
structure::ContentDisplay::Utf8 { max_line_length, .. } => {
let line_begin = (max_line_length * ((std::cmp::max(interstitial.begin, offset - addr::unit::BIT) - interstitial.begin) / max_line_length)).to_addr();

PositionState::Utf8 {
extent: addr::Extent::between(line_begin, offset),
index
}
},
};

return true;
Expand All @@ -902,6 +923,10 @@ impl Position {
self.state = PositionState::MetaContent(extent.begin, index);
true
},
PositionState::Utf8 { extent, index } => {
self.state = PositionState::MetaContent(extent.begin, index);
true
},

PositionState::SummaryOpener => {
self.try_ascend(AscendDirection::Prev)
Expand Down Expand Up @@ -1035,6 +1060,14 @@ impl Position {
}
},
structure::ContentDisplay::Hexstring => PositionState::Hexstring(interstitial, index),
structure::ContentDisplay::Utf8 { max_line_length, .. } => {
let line_end = std::cmp::min(interstitial.end, offset + max_line_length);

PositionState::Utf8 {
extent: addr::Extent::between(offset, line_end),
index
}
},
};

return true;
Expand All @@ -1052,6 +1085,10 @@ impl Position {
self.state = PositionState::MetaContent(extent.end, index);
true
},
PositionState::Utf8 { extent, index } => {
self.state = PositionState::MetaContent(extent.end, index);
true
},

PositionState::SummaryOpener => {
if self.node.children.is_empty() {
Expand Down Expand Up @@ -1262,6 +1299,7 @@ impl Position {
PositionState::MetaContent(offset, _) => offset,
PositionState::Hexdump { extent, .. } => extent.begin,
PositionState::Hexstring(extent, _) => extent.begin,
PositionState::Utf8 { extent, .. } => extent.begin,
PositionState::SummaryPreamble => addr::unit::NULL,
PositionState::SummaryOpener => addr::unit::NULL,
PositionState::SummaryLabel(i) => self.node.children[i].offset,
Expand All @@ -1284,6 +1322,7 @@ impl Position {
PositionState::MetaContent(_, _) => false,
PositionState::Hexdump { .. } => false,
PositionState::Hexstring(_, _) => false,
PositionState::Utf8 { .. } => false,

PositionState::SummaryPreamble => true,
PositionState::SummaryOpener => true,
Expand Down Expand Up @@ -1610,6 +1649,8 @@ mod cmp {
super::PositionState::Hexdump { index, .. } => index.cmp(child_index),
super::PositionState::Hexstring(_, i) if i == child_index => std::cmp::Ordering::Less,
super::PositionState::Hexstring(_, i) => i.cmp(child_index),
super::PositionState::Utf8 { index, .. } if index == child_index => std::cmp::Ordering::Less,
super::PositionState::Utf8 { index, .. } => index.cmp(child_index),
super::PositionState::SummaryPreamble => std::cmp::Ordering::Less,
super::PositionState::SummaryOpener => std::cmp::Ordering::Less,
super::PositionState::SummaryLabel(i) if i == child_index => std::cmp::Ordering::Less,
Expand Down Expand Up @@ -1716,6 +1757,7 @@ mod cmp {
super::PositionState::MetaContent(addr, index) => (StateGroup::NormalContent, 0, *index, *addr, 0),
super::PositionState::Hexdump { extent, line_extent: _, index } => (StateGroup::NormalContent, 0, *index, extent.begin, 1),
super::PositionState::Hexstring(extent, index) => (StateGroup::NormalContent, 0, *index, extent.begin, 1),
super::PositionState::Utf8 { extent, index } => (StateGroup::NormalContent, 0, *index, extent.begin, 1),
super::PositionState::SummaryPreamble => (StateGroup::SummaryContent, 0, 0, addr::unit::NULL, 0),
super::PositionState::SummaryOpener => (StateGroup::SummaryContent, 1, 0, addr::unit::NULL, 0),
super::PositionState::SummaryLabel(x) => (StateGroup::SummaryContent, 2, 2*x, addr::unit::NULL, 0),
Expand Down Expand Up @@ -1897,6 +1939,14 @@ pub mod xml {
|e| panic!("expected valid pitch, got '{}' ({:?})", p, e),
|a| a.to_size())),
},
Some("utf8") => structure::ContentDisplay::Utf8 {
max_line_length: xml.attribute("max_line_length")
.map_or(
16.into(),
|p| addr::Address::parse(p).map_or_else(
|e| panic!("expected valid pitch, got '{}' ({:?})", p, e),
|a| a.to_size())),
},
Some("none") => structure::ContentDisplay::None,
Some(invalid) => panic!("invalid content attribute: {}", invalid)
},
Expand Down Expand Up @@ -1940,6 +1990,11 @@ pub mod xml {
line: inflate_line_extent(&self.node)
}.into_token(),
"hexstring" => token::HexstringToken::new_maybe_truncate(common, inflate_extent(&self.node)).into_token(),
"utf8" => token::Utf8Token {
common,
extent: inflate_extent(&self.node),
truncated: false,
}.into_token(),
tn => panic!("invalid token def: '{}'", tn)
}
}
Expand Down
12 changes: 9 additions & 3 deletions src/model/listing/stream_tests/formatting.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
</node>
<node name="inlinedump" offset="0x20" size="0x20" title="inline" content="hexdump" pitch="8">
</node>
<node name="utf8" offset="0x30" size="0x20" title="inline" content="utf8" max_line_length="0x14">
</node>
</node>
<tokens>
<null node="root" />
Expand All @@ -22,9 +24,13 @@
<hexdump node="inlinedump" extent="10:18" line="10:+8" index="0" />
<hexdump node="inlinedump" extent="18:20" line="18:+8" index="0" />
</indent>
<hexdump node="root" extent="40:50" line="40:+0x10" index="2" />
<hexdump node="root" extent="50:60" line="50:+0x10" index="2" />
<hexdump node="root" extent="60:70" line="60:+0x10" index="2" />
<title node="utf8" />
<indent>
<utf8 node="utf8" extent="00:14" index="0" />
<utf8 node="utf8" extent="14:20" index="0" />
</indent>
<hexdump node="root" extent="50:60" line="50:+0x10" index="3" />
<hexdump node="root" extent="60:70" line="60:+0x10" index="3" />
<null node="root" cursor="true" />
</indent>
</tokens>
Expand Down
Loading

0 comments on commit 4c0b27f

Please sign in to comment.