Skip to content

Commit

Permalink
support downcasting in parser code, fixed some warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
rrevenantt committed Oct 21, 2020
1 parent b054ca8 commit be1ccd3
Show file tree
Hide file tree
Showing 35 changed files with 587 additions and 529 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ byteorder = "^1"
murmur3 = "=0.4"
bit-set = "=0.5.*"
once_cell = "^1.2.*"
backtrace = "=0.3"
#backtrace = "=0.3"
typed-arena = "^2.0.*"
better_any = "=0.1"

[lib]

Expand Down
17 changes: 13 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# antlr4rust
[![docs](https://docs.rs/antlr-rust/badge.svg)](https://docs.rs/antlr-rust)
[![Crate](https://img.shields.io/crates/v/antlr_rust.svg)](https://crates.io/crates/antlr_rust)

ANTLR4 runtime for Rust programming language

Tool(generator) part is currently located in rust-target branch of my antlr4 fork [rrevenantt/antlr4/tree/rust-target](https://github.com/rrevenantt/antlr4/tree/rust-target)
Expand Down Expand Up @@ -81,19 +84,25 @@ I.e. for `MultContext` struct will contain `a` and `b` fields containing child s
`op` field with `TerminalNode` type which corresponds to individual `Token`.
It also is possible to disable generic parse tree creation to keep only selected children via
`parser.build_parse_trees = false`.

### Key properties
- Supports full zero-copy parsing including byte parsers
(you should be able to write zero-copy serde deserializers).
- Supports downcasting in places where type is not known statically(trait objects and embedded action)
- Listener and

### Differences with Java
Although Rust runtime API has been made as close as possible to Java,
there are quite some differences because Rust is not an OOP language and is much more explicit.

- Supports full zero-copy parsing including byte parsers.
- If you are using labeled alternatives,
struct generated for rule is an enum with variant for each alternative
struct generated for the rule is an enum with variant for each alternative
- Parser needs to have ownership for listeners, but it is possible to get listener back via `ListenerId`
otherwise `ParseTreeWalker` should be used.
- In embedded actions to access parser you should use `recog` variable instead of `self`/`this`.
This is because predicate have to be inserted into two syntactically different places in generated parser
- String `InputStream` have different index behavior when there are unicode characters.
This is because predicates have to be inserted into two syntactically different places in generated parser
and in one of them it is impossible to have parser as `self`.
- str based `InputStream` have different index behavior when there are unicode characters.
If you need exactly the same behavior, use `[u32]` based `InputStream`, or implement custom `CharStream`.
- In actions you have to escape `'` in rust lifetimes with `\ ` because ANTLR considers them as strings, e.g. `Struct<\'lifetime>`
- To make custom tokens you should use `@tokenfactory` custom action, instead of usual `TokenLabelType` parser option.
Expand Down
8 changes: 3 additions & 5 deletions src/atn.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::collections::HashMap;
use std::rc::Rc;

use crate::atn_state::ATNState;
use crate::atn_state::ATNStateRef;
Expand All @@ -9,10 +8,9 @@ use crate::interval_set::IntervalSet;
use crate::lexer_action::LexerAction;
use crate::ll1_analyzer::LL1Analyzer;
use crate::parser::ParserNodeType;
use crate::parser_rule_context::ParserRuleContext;
use crate::rule_context::{EmptyContextType, RuleContext};
use crate::rule_context::EmptyContextType;
use crate::token::{TOKEN_EOF, TOKEN_EPSILON};
use crate::token_factory::{CommonTokenFactory, TokenFactory};
use crate::token_factory::CommonTokenFactory;
use crate::transition::RuleTransition;

pub const INVALID_ALT: isize = 0;
Expand Down Expand Up @@ -60,7 +58,7 @@ impl ATN {
///rule.
pub fn next_tokens<'a>(&self, s: &'a dyn ATNState) -> &'a IntervalSet {
s.get_next_tokens_within_rule().get_or_init(|| {
self.next_tokens_in_ctx::<EmptyContextType<CommonTokenFactory>>(s, None)
self.next_tokens_in_ctx::<EmptyContextType<'_, CommonTokenFactory>>(s, None)
.modify_with(|r| r.read_only = true)
})
}
Expand Down
18 changes: 2 additions & 16 deletions src/atn_deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ use crate::int_stream::EOF;
use crate::interval_set::IntervalSet;
use crate::lexer_action::LexerAction::*;
use crate::lexer_action::*;
use crate::rule_context::CustomRuleContext;
use crate::transition::Transition;
use crate::transition::*;

Expand All @@ -43,20 +42,12 @@ const SERIALIZED_VERSION: isize = 3;

pub struct ATNDeserializer {
deserialization_options: ATNDeserializationOptions,
data: Vec<u8>,
pos: isize,
uuid: String,
// pd:PhantomData<*const T>
}

impl ATNDeserializer {
pub fn new(options: Option<ATNDeserializationOptions>) -> ATNDeserializer {
ATNDeserializer {
deserialization_options: options.unwrap_or(ATNDeserializationOptions::default()),
data: Vec::new(),
pos: 0,
uuid: String::new(),
// pd: PhantomData,
}
}

Expand All @@ -66,7 +57,7 @@ impl ATNDeserializer {
unimplemented!()
}

pub fn deserialize(&self, data: Chars) -> ATN {
pub fn deserialize(&self, data: Chars<'_>) -> ATN {
let mut data = data.clone().map(|ch| {
let mut ch = ch as isize;
// decode surrogates
Expand Down Expand Up @@ -160,7 +151,6 @@ impl ATNDeserializer {
if state_type == ATNSTATE_INVALID_STATE_NUMBER {
atn.add_state(self.state_factory(ATNSTATE_INVALID_TYPE, -1, i));
panic!("why invalid state serialized?");
continue;
}

let mut rule_index = data.next().unwrap();
Expand Down Expand Up @@ -438,11 +428,7 @@ impl ATNDeserializer {
let mut precedence_states = Vec::new();
for state in _atn.states.iter() {
if let ATNStateType::DecisionState {
state:
ATNDecisionState::StarLoopEntry {
loop_back_state,
is_precedence,
},
state: ATNDecisionState::StarLoopEntry { .. },
..
} = state.get_state_type()
{
Expand Down
1 change: 1 addition & 0 deletions src/char_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::utils::Sealed;
/// Provides underlying data for Tokens.
pub trait CharStream<Data>: IntStream {
/// Returns underlying data piece, either slice or owned copy.
/// Panics if provided indexes are invalid
fn get_text(&self, a: isize, b: isize) -> Data;
fn get_text_from_interval(&self, i: &Interval) -> Data { self.get_text(i.a, i.b) }
}
Expand Down
2 changes: 2 additions & 0 deletions src/common_token_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ use crate::token::{OwningToken, Token, TOKEN_DEFAULT_CHANNEL, TOKEN_INVALID_TYPE
use crate::token_factory::TokenFactory;
use crate::token_source::TokenSource;
use crate::token_stream::{TokenStream, UnbufferedTokenStream};
use better_any::{Tid, TidAble};

#[derive(Tid)]
pub struct CommonTokenStream<'input, T: TokenSource<'input>> {
base: UnbufferedTokenStream<'input, T>,
channel: isize,
Expand Down
Loading

0 comments on commit be1ccd3

Please sign in to comment.