Skip to content

Commit

Permalink
Fixed most of the bugs, full listener support, added tree walker supp…
Browse files Browse the repository at this point in the history
…ort. almost all remaining features implemented
  • Loading branch information
rrevenantt committed Jan 19, 2020
1 parent 5bf545a commit 67eb97b
Show file tree
Hide file tree
Showing 49 changed files with 2,263 additions and 1,111 deletions.
6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
name = "antlr-rust"
version = "0.1.0"
authors = ["Konstantin Anisimov <[email protected]>"]
description = "ANTLR4 runtime for Rust"
edition = "2018"
build = false
license = "BSD-3-Clause"
keywords = ["ANTLR","ANTLR4","parsing","runtime"]
categories = ["parsing"]

[dependencies]
lazy_static = "1.4.*"
Expand All @@ -12,6 +15,7 @@ byteorder = "1"
murmur3 = "0.4"
bit-set = "0.5.*"
once_cell = "1.2.*"
backtrace = "0.3"

[lib]

Expand Down
28 changes: 17 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,31 @@ and [tests/my_tests.rs](tests/my_test.rs) for usage

# Implementation status

WIP, most of the logic is working(almost all antls test suit tests related to parsing/lexing logic are passing) but you should still expect bugs/panics.
If you are not going to use yet unimplemented features you might find it useful already.
WIP, almost all tests are passing, but you should still expect bugs/panics.
You might find it useful already, but it is not ready for production yet.
Also API very likely will have some changes.

Missing features:
- [ ] Lexer
- [ ] Couple corner cases from ANTLR4 test suit are still failing
Currently requires nightly version of rust.
This very likely will be the case until specialization is stabilized.

Remaining things:
- [ ] Parser
- [ ] recovery/error reporting is partially working
- [ ] some internal optimizations
- [ ] labeled alternatives/childs
- [ ] return values
- [ ] Full testing with ANTLR4 test suit(currently about 95% parser logic tests are passing )
- [ ] retrieve child by index if children have labeled alternatives
- [ ] Generator
- [ ] Rebase to upstream
- [ ] CI
- [ ] Documentation
- [ ] API stabilization
- [ ] Rust api guidelines compliance

#

# Future improvements:
- make parsing zero copy(i.e. use &str instead String in token and &Token in tree nodes)
- use & instead of Rc for nodes in parser
- support no_std(although alloc would still be required)
- support stable rust

# Licence

MIT
BSD 3-clause
15 changes: 15 additions & 0 deletions grammars/Labels.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
grammar Labels;
z : s[0] ;
s[isize v] : q=e {println!("{}",$e.v)};
e returns [isize v]
: a=e op='*' b=e {$v = $a.v * $b.v;} # mult
| a=e '+' b=e {$v = $a.v + $b.v;} # add
| INT {$v = $INT.int;} # anInt
| '(' x=e ')' {$v = $x.v;} # parens
| x=e '++' {$v = $x.v+1;} # inc
| e '--' # dec
| ID {$v = 3;} # anID
;
ID : 'a'..'z'+ ;
INT : '0'..'9'+ ;
WS : (' '|'\n') -> skip ;
2 changes: 1 addition & 1 deletion grammars/XMLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ TEXT : ~[<&]+ ; // match any 16 bit char other than < and &
// ----------------- Everything INSIDE of a tag ---------------------
mode INSIDE;

CLOSE : '>' {lexer.pop_mode();} ;
CLOSE : '>' {recog.pop_mode();} ;
SPECIAL_CLOSE: '?>' -> popMode ; // close <?xml...?>
SLASH_CLOSE : '/>' -> popMode ;
SLASH : '/' ;
Expand Down
5 changes: 3 additions & 2 deletions src/atn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use std::collections::HashMap;
use std::rc::Rc;
use std::sync::Once;

use backtrace::Backtrace;

use crate::atn_deserializer::cast;
use crate::atn_state::ATNState;
use crate::atn_state::ATNStateRef;
Expand Down Expand Up @@ -66,7 +68,7 @@ impl ATN {
// }

pub fn next_tokens<'a>(&self, s: &'a dyn ATNState) -> &'a IntervalSet {
s.get_next_token_within_rule().get_or_init(|| {
s.get_next_tokens_within_rule().get_or_init(|| {
self.next_tokens_in_ctx(s, None)
.modify_with(|r| {
// println!("expecting {:?}", r);
Expand Down Expand Up @@ -124,7 +126,6 @@ impl ATN {
if following.contains(TOKEN_EPSILON) {
expected.add_one(TOKEN_EOF);
}

expected
}
}
18 changes: 12 additions & 6 deletions src/atn_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@ impl Eq for ATNConfig {}

impl PartialEq for ATNConfig {
fn eq(&self, other: &Self) -> bool {
self.get_state() == other.get_state() && self.get_alt() == other.get_alt()
self.get_state() == other.get_state()
&& self.get_alt() == other.get_alt()
&& self.get_context() == other.get_context()
&& self.get_type() == other.get_type()
&& self.semantic_context == other.semantic_context
&& self.precedence_filter_suppressed == other.precedence_filter_suppressed
// && semantic context
}
}
Expand All @@ -48,7 +51,7 @@ impl Hash for ATNConfig {
None => state.write_i32(0),
Some(c) => c.hash(state),
}
//todo semantic context
self.semantic_context.hash(state);
if let LexerATNConfig { lexer_action_executor, passed_through_non_greedy_decision } = &self.config_type {
state.write_i32(if *passed_through_non_greedy_decision { 1 } else { 0 });
match lexer_action_executor {
Expand All @@ -59,9 +62,11 @@ impl Hash for ATNConfig {
}
}

#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct ATNConfig<T: DerefMut<Target=PredictionContext> = Box<PredictionContext>> {
precedence_filter_suppressed: bool,
//todo since ATNState is immutable when we started working with ATNConfigs
// looks like it is possible to have usual reference here
state: ATNStateRef,
alt: isize,
//todo maybe option is unnecessary and PredictionContext::EMPTY would be enough
Expand All @@ -70,12 +75,13 @@ pub struct ATNConfig<T: DerefMut<Target=PredictionContext> = Box<PredictionConte
// or maybe transform it into local variant with Rc because prediction for particular symbol is done in one thread
// or PredictionContext might be behind Box<dyn DerefMut<Target=PredictionContext>> to choose Rc/Arc at runtime
context: Option<T>,
//todo looks like here option is also unnesesary
pub semantic_context: Option<Box<SemanticContext>>,
pub reaches_into_outer_context: isize,
pub config_type: ATNConfigType,
}

#[derive(Eq, PartialEq, Clone)]
#[derive(Eq, PartialEq, Clone, Debug)]
pub enum ATNConfigType {
BaseATNConfig,
LexerATNConfig {
Expand Down Expand Up @@ -114,7 +120,7 @@ impl ATNConfig {
alt,
context: context.map(Box::new),
// semantic_context: SemanticContext::empty(),
semantic_context: None,
semantic_context: Some(Box::new(SemanticContext::NONE)),
reaches_into_outer_context: 0,
config_type: ATNConfigType::BaseATNConfig,
}
Expand Down Expand Up @@ -286,7 +292,7 @@ impl ATNConfig {
self.reaches_into_outer_context = _v
}

pub fn get_precedence_filter_suppressed(&self) -> bool {
pub fn is_precedence_filter_suppressed(&self) -> bool {
self.precedence_filter_suppressed
}

Expand Down
85 changes: 66 additions & 19 deletions src/atn_config_set.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
use std::cell::Cell;
use std::cmp::max;
use std::collections::{HashMap, HashSet};
use std::fmt::{Debug, Error, Formatter};
use std::hash::{Hash, Hasher};

use bit_set::BitSet;
use murmur3::murmur3_32::MurmurHasher;

use crate::atn_config::ATNConfig;
use crate::atn_simulator::{BaseATNSimulator, IATNSimulator};
use crate::dfa::ScopeExt;
use crate::parser_atn_simulator::MergeCache;
use crate::parser_rule_context::empty_ctx;
use crate::prediction_context::{MurmurHasherBuilder, PredictionContext};
use crate::semantic_context::SemanticContext;

Expand Down Expand Up @@ -51,15 +54,16 @@ use crate::semantic_context::SemanticContext;
// fn set_dips_into_outer_context(&self, v: bool);
//}

#[derive(Eq, PartialEq)]
//#[derive(Debug)]
pub struct ATNConfigSet {
cached_hash: u64,

config_lookup: HashMap<u64, usize>,

//todo remove box?
pub(crate) configs: Vec<Box<ATNConfig>>,

conflicting_alts: BitSet,
pub(crate) conflicting_alts: BitSet,

dips_into_outer_context: bool,

Expand All @@ -70,8 +74,29 @@ pub struct ATNConfigSet {
read_only: bool,

unique_alt: isize,

hasher: fn(&ATNConfig) -> u64,
}

impl Debug for ATNConfigSet {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
unimplemented!()
}
}

impl PartialEq for ATNConfigSet {
fn eq(&self, other: &Self) -> bool {
self.configs == other.configs &&
self.full_ctx == other.full_ctx &&
self.unique_alt == other.unique_alt &&
self.conflicting_alts == other.conflicting_alts &&
self.has_semantic_context == other.has_semantic_context &&
self.dips_into_outer_context == other.dips_into_outer_context
}
}

impl Eq for ATNConfigSet {}

impl Hash for ATNConfigSet {
fn hash<H: Hasher>(&self, state: &mut H) {
// if self.cached_hash.get() == 0 {
Expand All @@ -94,17 +119,19 @@ impl ATNConfigSet {
has_semantic_context: false,
read_only: false,
unique_alt: 0,
hasher: Self::atn_config_local_hash
}
}

fn hash_code_configs(&self) -> isize {
unimplemented!()
}

fn new_ordered_atnconfig_set() -> ATNConfigSet {
let a = ATNConfigSet::new_base_atnconfig_set(true);
// a.config_lookup =
unimplemented!();
pub fn new_ordered_atnconfig_set() -> ATNConfigSet {
let mut a = ATNConfigSet::new_base_atnconfig_set(true);

a.hasher = Self::atn_config_full_hash;
// unimplemented!();
a
}

Expand All @@ -116,16 +143,26 @@ impl ATNConfigSet {
//impl ATNConfigSet for BaseATNConfigSet {

// fn add(&self, config: ATNConfig, mergeCache: * DoubleDict) -> bool { unimplemented!() }
fn atn_config_local_hash(config: &ATNConfig) -> u64 {
fn atn_config_full_hash(config: &ATNConfig) -> u64 {
let mut hashcode = 7u64;
hashcode = 31 * hashcode + config.get_state() as u64;
hashcode = 31 * hashcode + config.get_alt() as u64;
let mut hasher = MurmurHasher::default();
config.get_context().hash(&mut hasher);
config.get_semantic_context().hash(&mut hasher);
hashcode = 31 * hashcode + hasher.finish();

//todo semantic context
// hashcode = 31* hashcode + config
hashcode
}


fn atn_config_local_hash(config: &ATNConfig) -> u64 {
let mut hashcode = 7u64;
hashcode = 31 * hashcode + config.get_state() as u64;
hashcode = 31 * hashcode + config.get_alt() as u64;
let mut hasher = MurmurHasher::default();
config.get_semantic_context().hash(&mut hasher);
hashcode = 31 * hashcode + hasher.finish();

hashcode
}
Expand All @@ -136,15 +173,18 @@ impl ATNConfigSet {
merge_cache: Option<&mut MergeCache>,
) -> bool {
assert!(!self.read_only);
//todo semantic context
if config.get_semantic_context().is_some() {

if config.get_semantic_context().is_some() && *config.get_semantic_context().unwrap() != SemanticContext::NONE {
self.has_semantic_context = true
}

// assert!(config.get_context().unwrap().is_consistent());

if config.get_reaches_into_outer_context() > 0 {
self.dips_into_outer_context = true
}
let hash = Self::atn_config_local_hash(config.as_ref());
let hasher = self.hasher;
let hash = hasher(config.as_ref());

if let Some(existing) = self.config_lookup.get(&hash) {
let existing = self.configs.get_mut(*existing).unwrap().as_mut();
Expand All @@ -154,13 +194,14 @@ impl ATNConfigSet {
config.take_context(),
root_is_wildcard,
);

merged.calc_hash();

existing.set_reaches_into_outer_context(
max(existing.get_reaches_into_outer_context(), config.get_reaches_into_outer_context())
);

if config.get_precedence_filter_suppressed() {
if config.is_precedence_filter_suppressed() {
existing.set_precedence_filter_suppressed(true)
}

Expand Down Expand Up @@ -243,9 +284,15 @@ impl ATNConfigSet {
self.full_ctx
}

pub fn get_conflicting_alts(&self) -> &BitSet { &self.conflicting_alts }

pub fn set_conflicting_alts(&mut self, v: BitSet) { self.conflicting_alts = v }
//duplicate of the self.conflicting_alts???
pub fn get_alts(&self) -> BitSet {
self.configs
.iter()
.fold(BitSet::new(), |mut acc, c| {
acc.insert(c.get_alt() as usize);
acc
})
}

pub fn get_unique_alt(&self) -> isize {
self.unique_alt
Expand All @@ -256,10 +303,10 @@ impl ATNConfigSet {
}

pub fn get_dips_into_outer_context(&self) -> bool {
unimplemented!()
self.dips_into_outer_context
}

pub fn set_dips_into_outer_context(&self, _v: bool) {
unimplemented!()
pub fn set_dips_into_outer_context(&mut self, _v: bool) {
self.dips_into_outer_context = _v
}
}
Loading

0 comments on commit 67eb97b

Please sign in to comment.