Skip to content

Commit

Permalink
preliminary byte parser support, almost fully refotmatted with rustfmt
Browse files Browse the repository at this point in the history
  • Loading branch information
rrevenantt committed Jul 7, 2020
1 parent 6bb617b commit d765c85
Show file tree
Hide file tree
Showing 48 changed files with 2,015 additions and 1,580 deletions.
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ keywords = ["ANTLR","ANTLR4","parsing","runtime"]
categories = ["parsing"]

[dependencies]
lazy_static = "1.4.*"
lazy_static = "^1.4.*"
uuid = "=0.6.*"
byteorder = "1"
byteorder = "^1"
murmur3 = "=0.4"
bit-set = "=0.5.*"
once_cell = "1.2.*"
once_cell = "^1.2.*"
backtrace = "=0.3"
typed-arena = "2.0.*"
typed-arena = "^2.0.*"
git2 = "0.12.*"

[lib]
Expand Down
22 changes: 9 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,13 @@ For now development is going on in this repository
but eventually it will be merged to main ANTLR4 repo

Currently requires nightly version of rust.
This likely will be the case until `unsize` or some kind of `CoerceUnsized` is stabilized.
This likely will be the case until `coerce_unsize` or some kind of coersion trait is stabilized.
There are other unstable features in use but only `CoerceUnsized` is essential.

Remaining things before merge:
- API stabilization
- [ ] Rust api guidelines compliance
- [ ] more tests for API because it is quite different from Java
- generate enum for labeled alternatives without redundant `Error` option
- option to generate fields instead of getters by default
- move useful exports to lib.rs for better documentation
- reexport statics crate and move to once_cell
- support byte level parser

Can be done after merge:
- profiling and performance optimizations
Expand All @@ -40,9 +35,10 @@ Can be done after merge:
- [ ] Not all warning are fixed
- cfg to not build potentially unnecessary parts
(no Lexer if custom token stream, no ParserATNSimulator if LL(1) grammar)
- visitor
- run rustfmt on generated parser
###### Long term improvements
- generate enum for labeled alternatives without redundant `Error` option
- option to generate fields instead of getters by default
- make tree generic over pointer type
(requires GAT, otherwise it would be a problem for users that want ownership for parse tree)
- support stable rust
Expand All @@ -65,10 +61,9 @@ Then add following to `Cargo.toml` of the crate from which generated parser
is going to be used:
```toml
[dependencies]
lazy_static = "1.4"
antlr-rust = "0.1"
antlr-rust = "=0.2"
```
and `#![feature(try_blocks)]` in your project root module.
and `#![feature(try_blocks)]`(also `#![feature(specialization)]` if you are generating visitor) in your project root module.

### Parse Tree structure

Expand All @@ -89,16 +84,17 @@ It also is possible to disable generic parse tree creation to keep only selected
`parser.build_parse_trees = false`.

### Differences with Java
Although Rust runtime API is made as close as possible to Java,
Although Rust runtime API has been made as close as possible to Java,
there are quite some differences because Rust is not an OOP language and is much more explicit.

- All rule context variables (rule argument or rule return) should implement `Default + Clone`.
- If you are using labeled alternatives,
struct generated for rule is a enum with variant for each alternative
- Parser needs to have ownership for listeners, but it is possible te get listener back via `ListenerId`
- Parser needs to have ownership for listeners, but it is possible to get listener back via `ListenerId`
otherwise `ParseTreeWalker` should be used.
- In embedded actions to access parser you should use `recog` variable instead of `self`.
- In embedded actions to access parser you should use `recog` variable instead of `self`/`this`.
This is because predicate have to be inserted into two syntactically different places in generated parser
- In actions you have to escape `'` in rust lifetimes with `\ ` because ANTLR considers them as strings: `Struct<\'lifetime>`
- For custom tokens you should use `@tokenfactory` custom action, instead of usual `TokenLabelType` parser option


Expand Down
17 changes: 8 additions & 9 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,36 @@ use std::process::Command;

fn main() {
let grammars = vec!["CSV", "ReferenceToATN", "XMLLexer", "SimpleLR", "Labels"];
let additional_args = vec![Some("-visitor"), None, None, None, None, ];
let antlr_path = "/home/rrevenantt/dev/antlr4/tool/target/antlr4-4.8-2-SNAPSHOT-complete.jar";

for grammar in grammars {
for (grammar, arg) in grammars.into_iter().zip(additional_args) {
//ignoring error because we do not need to run anything when deploying to crates.io
let _ = gen_for_grammar(grammar, antlr_path);
let _ = gen_for_grammar(grammar, antlr_path, arg);
}


println!("cargo:rerun-if-changed=build.rs");

println!("cargo:rerun-if-changed=/home/rrevenantt/dev/antlr4/tool/target/antlr4-4.8-2-SNAPSHOT-complete.jar");
}

fn gen_for_grammar(grammar_file_name: &str, antlr_path: &str) -> Result<(), Box<dyn Error>> {
let out_dir = env::var("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir);
fn gen_for_grammar(grammar_file_name: &str, antlr_path: &str, additional_arg: Option<&str>) -> Result<(), Box<dyn Error>> {
// let out_dir = env::var("OUT_DIR").unwrap();
// let dest_path = Path::new(&out_dir);

let input = env::current_dir().unwrap().join("grammars");
let file_name = grammar_file_name.to_owned() + ".g4";

let x = Command::new("java")
let c = Command::new("java")
.current_dir(input)
// .arg("-jar")
.arg("-cp")
.arg(antlr_path)
.arg("org.antlr.v4.Tool")
.arg("-Dlanguage=Rust")
.arg("-no-visitor")
.arg("-o")
.arg("../tests/gen")
.arg(&file_name)
.args(additional_arg)
.spawn()
.expect("antlr tool failed to start")
.wait_with_output()?;
Expand Down
5 changes: 5 additions & 0 deletions grammars/ReferenceToATN.g4
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
grammar ReferenceToATN;

@tokenfactory{
pub type LocalTokenFactory<\'input> = antlr_rust::token_factory::OwningTokenFactory;
}
a : (ID|ATN)* ATN? {println!("{}",$text);};
ID : 'a'..'z'+ ;
ATN : '0'..'9'+;
Expand Down
3 changes: 2 additions & 1 deletion rustfmt.toml
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
edition = "2018"
edition = "2018"
fn_single_line = true
29 changes: 16 additions & 13 deletions src/atn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,19 @@ impl ATN {
pub fn next_tokens<'a>(&self, s: &'a dyn ATNState) -> &'a IntervalSet {
s.get_next_tokens_within_rule().get_or_init(|| {
self.next_tokens_in_ctx::<EmptyContextType<CommonTokenFactory>>(s, None)
.modify_with(|r| {
r.read_only = true
}
)
.modify_with(|r| r.read_only = true)
})
}

/// Compute the set of valid tokens that can occur starting in state `s`.
/// If `ctx` is null, the set of tokens will not include what can follow
/// the rule surrounding `s`. In other words, the set will be
/// restricted to tokens reachable staying within `s`'s rule.
pub fn next_tokens_in_ctx<'a, Ctx: ParserNodeType<'a>>(&self, s: &dyn ATNState, _ctx: Option<&Ctx::Type>) -> IntervalSet {
pub fn next_tokens_in_ctx<'a, Ctx: ParserNodeType<'a>>(
&self,
s: &dyn ATNState,
_ctx: Option<&Ctx::Type>,
) -> IntervalSet {
let analyzer = LL1Analyzer::new(self);
analyzer.look::<'a, Ctx>(s, None, _ctx)
}
Expand All @@ -82,13 +83,9 @@ impl ATN {
self.states.push(state)
}

fn remove_state(&self, _state: ATNStateRef) {
unimplemented!()
}
fn remove_state(&self, _state: ATNStateRef) { unimplemented!() }

fn define_decision_state(&self, _s: ATNStateRef) -> isize {
unimplemented!()
}
fn define_decision_state(&self, _s: ATNStateRef) -> isize { unimplemented!() }

pub fn get_decision_state(&self, decision: usize) -> ATNStateRef {
self.decision_to_state[decision]
Expand Down Expand Up @@ -126,7 +123,11 @@ impl ATN {
/// specified state in the specified context.
/// @throws IllegalArgumentException if the ATN does not contain a state with
/// number {@code stateNumber}
pub fn get_expected_tokens<'a, Ctx: ParserNodeType<'a>>(&self, state_number: isize, _ctx: &Rc<Ctx::Type>) -> IntervalSet {
pub fn get_expected_tokens<'a, Ctx: ParserNodeType<'a>>(
&self,
state_number: isize,
_ctx: &Rc<Ctx::Type>,
) -> IntervalSet {
let s = self.states[state_number as usize].as_ref();
let mut following = self.next_tokens(s);
if !following.contains(TOKEN_EPSILON) {
Expand All @@ -138,7 +139,9 @@ impl ATN {
let mut ctx = Some(Rc::clone(_ctx));

while let Some(c) = ctx {
if c.get_invoking_state() < 0 || !following.contains(TOKEN_EPSILON) { break }
if c.get_invoking_state() < 0 || !following.contains(TOKEN_EPSILON) {
break;
}

let invoking_state = self.states[c.get_invoking_state() as usize].as_ref();
let tr = invoking_state.get_transitions().first().unwrap().as_ref();
Expand Down
95 changes: 56 additions & 39 deletions src/atn_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ pub struct ATNConfig {
//todo maybe option is unnecessary and PredictionContext::EMPTY would be enough
//another todo check arena alloc
context: Option<Arc<PredictionContext>>,
//todo looks like here option is also unnesesary
pub semantic_context: Box<SemanticContext>,
pub reaches_into_outer_context: isize,
pub(crate) config_type: ATNConfigType,
Expand Down Expand Up @@ -50,8 +49,16 @@ impl Hash for ATNConfig {
Some(c) => c.hash(state),
}
self.semantic_context.hash(state);
if let LexerATNConfig { lexer_action_executor, passed_through_non_greedy_decision } = &self.config_type {
state.write_i32(if *passed_through_non_greedy_decision { 1 } else { 0 });
if let LexerATNConfig {
lexer_action_executor,
passed_through_non_greedy_decision,
} = &self.config_type
{
state.write_i32(if *passed_through_non_greedy_decision {
1
} else {
0
});
match lexer_action_executor {
None => state.write_i32(0),
Some(ex) => ex.hash(state),
Expand All @@ -62,7 +69,12 @@ impl Hash for ATNConfig {

impl Debug for ATNConfig {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
f.write_fmt(format_args!("({},{},[{}]", self.state, self.alt, self.context.as_deref().unwrap()))?;
f.write_fmt(format_args!(
"({},{},[{}]",
self.state,
self.alt,
self.context.as_deref().unwrap()
))?;
if self.reaches_into_outer_context > 0 {
f.write_fmt(format_args!(",up={}", self.reaches_into_outer_context))?;
}
Expand All @@ -84,7 +96,10 @@ impl ATNConfig {
pub(crate) fn get_lexer_executor(&self) -> Option<&LexerActionExecutor> {
match &self.config_type {
ATNConfigType::BaseATNConfig => None,
ATNConfigType::LexerATNConfig { lexer_action_executor, .. } => lexer_action_executor.as_deref(),
ATNConfigType::LexerATNConfig {
lexer_action_executor,
..
} => lexer_action_executor.as_deref(),
}
}

Expand Down Expand Up @@ -135,75 +150,77 @@ impl ATNConfig {
atnconfig
}

pub fn cloned_with_new_semantic(&self, target: &dyn ATNState, ctx: Box<SemanticContext>) -> ATNConfig {
pub fn cloned_with_new_semantic(
&self,
target: &dyn ATNState,
ctx: Box<SemanticContext>,
) -> ATNConfig {
let mut new = self.cloned(target);
new.semantic_context = ctx;
new
}

pub fn cloned(&self, target: &dyn ATNState) -> ATNConfig {
// println!("depth {}",PredictionContext::size(self.context.as_deref()));
// println!("depth {}",PredictionContext::size(self.context.as_deref()));
let mut new = self.clone();
new.state = target.get_state_number();
if let ATNConfigType::LexerATNConfig { passed_through_non_greedy_decision, .. } = &mut new.config_type {
if let ATNConfigType::LexerATNConfig {
passed_through_non_greedy_decision,
..
} = &mut new.config_type
{
*passed_through_non_greedy_decision = check_non_greedy_decision(self, target);
}
new
}

pub fn cloned_with_new_ctx(&self, target: &dyn ATNState, ctx: Option<Arc<PredictionContext>>) -> ATNConfig {
pub fn cloned_with_new_ctx(
&self,
target: &dyn ATNState,
ctx: Option<Arc<PredictionContext>>,
) -> ATNConfig {
let mut new = self.cloned(target);
new.context = ctx;

new
}

pub(crate) fn cloned_with_new_exec(&self, target: &dyn ATNState, exec: Option<LexerActionExecutor>) -> ATNConfig {
pub(crate) fn cloned_with_new_exec(
&self,
target: &dyn ATNState,
exec: Option<LexerActionExecutor>,
) -> ATNConfig {
let mut new = self.cloned(target);
if let ATNConfigType::LexerATNConfig {
lexer_action_executor, passed_through_non_greedy_decision: _
} = &mut new.config_type {
lexer_action_executor,
passed_through_non_greedy_decision: _,
} = &mut new.config_type
{
*lexer_action_executor = exec.map(Box::new);
// *passed_through_non_greedy_decision = check_non_greedy_decision(self, target);
// *passed_through_non_greedy_decision = check_non_greedy_decision(self, target);
}
new
}

pub fn get_state(&self) -> ATNStateRef {
self.state
}
pub fn get_state(&self) -> ATNStateRef { self.state }

pub fn get_alt(&self) -> isize {
self.alt
}
pub fn get_alt(&self) -> isize { self.alt }

pub(crate) fn get_type(&self) -> &ATNConfigType {
&self.config_type
}
pub(crate) fn get_type(&self) -> &ATNConfigType { &self.config_type }

pub fn get_context(&self) -> Option<&Arc<PredictionContext>> {
self.context.as_ref()
}
pub fn get_context(&self) -> Option<&Arc<PredictionContext>> { self.context.as_ref() }

pub fn take_context(&mut self) -> Arc<PredictionContext> {
self.context.take().unwrap()
}
pub fn take_context(&mut self) -> Arc<PredictionContext> { self.context.take().unwrap() }

pub fn set_context(&mut self, _v: Arc<PredictionContext>) {
self.context = Some(_v);
}
pub fn set_context(&mut self, _v: Arc<PredictionContext>) { self.context = Some(_v); }

pub fn get_reaches_into_outer_context(&self) -> isize {
self.reaches_into_outer_context
}
pub fn get_reaches_into_outer_context(&self) -> isize { self.reaches_into_outer_context }

pub fn set_reaches_into_outer_context(&mut self, _v: isize) {
self.reaches_into_outer_context = _v
}

pub fn is_precedence_filter_suppressed(&self) -> bool {
self.precedence_filter_suppressed
}
pub fn is_precedence_filter_suppressed(&self) -> bool { self.precedence_filter_suppressed }

pub fn set_precedence_filter_suppressed(&mut self, _v: bool) {
self.precedence_filter_suppressed = _v;
Expand All @@ -212,7 +229,8 @@ impl ATNConfig {

fn check_non_greedy_decision(source: &ATNConfig, target: &dyn ATNState) -> bool {
if let LexerATNConfig {
passed_through_non_greedy_decision: true, ..
passed_through_non_greedy_decision: true,
..
} = source.get_type()
{
return true;
Expand All @@ -225,4 +243,3 @@ fn check_non_greedy_decision(source: &ATNConfig, target: &dyn ATNState) -> bool
}
false
}

Loading

0 comments on commit d765c85

Please sign in to comment.