Skip to content

Commit

Permalink
wip zerocopy x2, lib compiles successfully
Browse files Browse the repository at this point in the history
  • Loading branch information
rrevenantt committed Apr 5, 2020
1 parent 97cb6f8 commit 466b370
Show file tree
Hide file tree
Showing 13 changed files with 241 additions and 128 deletions.
1 change: 1 addition & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ fn main() {
}

println!("cargo:rerun-if-changed=build.rs");

println!("cargo:rerun-if-changed=/home/rrevenantt/dev/antlr4/tool/target/antlr4-4.8-2-SNAPSHOT-complete.jar");
}

Expand Down
80 changes: 61 additions & 19 deletions src/common_token_factory.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,32 @@
use std::borrow::{Borrow, BorrowMut};
use std::borrow::Cow::{Borrowed, Owned};
use std::marker::Unsize;
use std::cell::Cell;
use std::marker::{PhantomData, Unsize};
use std::ops::CoerceUnsized;

use typed_arena::Arena;

use crate::char_stream::CharStream;
use crate::token::{CommonToken, OwningToken, TOKEN_INVALID_TYPE};
use crate::token::Token;

lazy_static! {
pub static ref CommonTokenFactoryDEFAULT: Box<CommonTokenFactory> =
Box::new(CommonTokenFactory::new());
pub static ref INVALID_TOKEN:Box<OwningToken> = CommonTokenFactoryDEFAULT.as_ref().create(None,TOKEN_INVALID_TYPE,None,0,-1,-1,-1,-1).to_owned();
Box::new(CommonTokenFactory{});
}

thread_local! {
pub static INVALID_TOKEN:Box<OwningToken> = CommonTokenFactoryDEFAULT.as_ref().create(None,TOKEN_INVALID_TYPE,None,0,-1,-1,-1,-1).to_owned();
}

/// This is a trait for creating tokens
pub trait TokenFactory<'a>: Sync {
// todo remove redundant allocation for arenas

/// Trait for creating tokens
pub trait TokenFactory<'a> {
/// type of tokens emitted by this factory
type Tok: Token + ?Sized + Unsize<dyn Token + 'a> + 'a;
type Inner: Token + ?Sized + Unsize<dyn Token + 'a> + 'a;
type Tok: Borrow<Self::Inner> + Clone;

fn create<'b: 'a>(&'a self,
source: Option<&mut dyn CharStream<'b>>,
ttype: isize,
Expand All @@ -25,14 +36,15 @@ pub trait TokenFactory<'a>: Sync {
stop: isize,
line: isize,
column: isize,
) -> Box<Self::Tok>;
) -> Self::Tok;
}

#[derive(Default)]
pub struct CowTokenFactory;

impl<'a> TokenFactory<'a> for CowTokenFactory {
type Tok = CommonToken<'a>;
type Inner = CommonToken<'a>;
type Tok = Box<Self::Inner>;

fn create<'b: 'a>(&'a self,
source: Option<&mut dyn CharStream<'b>>,
Expand All @@ -43,7 +55,7 @@ impl<'a> TokenFactory<'a> for CowTokenFactory {
stop: isize,
line: isize,
column: isize,
) -> Box<Self::Tok> {
) -> Self::Tok {
let text = match (text, source) {
(Some(t), _) => Owned(t),

Expand All @@ -58,7 +70,7 @@ impl<'a> TokenFactory<'a> for CowTokenFactory {
channel,
start,
stop,
token_index: -1,
token_index: Cell::new(-1),
line,
column,
text,
Expand All @@ -71,7 +83,8 @@ impl<'a> TokenFactory<'a> for CowTokenFactory {
pub struct CommonTokenFactory {}

impl<'a> TokenFactory<'a> for CommonTokenFactory {
type Tok = OwningToken;
type Inner = OwningToken;
type Tok = Box<Self::Inner>;

fn create<'b: 'a>(&'a self,
source: Option<&mut dyn CharStream<'b>>,
Expand All @@ -82,7 +95,7 @@ impl<'a> TokenFactory<'a> for CommonTokenFactory {
stop: isize,
line: isize,
column: isize,
) -> Box<Self::Tok> {
) -> Self::Tok {
let text = match (text, source) {
(Some(t), _) => t,

Expand All @@ -97,7 +110,7 @@ impl<'a> TokenFactory<'a> for CommonTokenFactory {
channel,
start,
stop,
token_index: -1,
token_index: Cell::new(-1),
line,
column,
text,
Expand All @@ -106,12 +119,41 @@ impl<'a> TokenFactory<'a> for CommonTokenFactory {
}
}

impl CommonTokenFactory {
pub fn new() -> CommonTokenFactory {
CommonTokenFactory {}
}
// pub struct DynFactory<'input,TF:TokenFactory<'input>>(TF) where TF::Tok:CoerceUnsized<Box<dyn Token+'input>>;
// impl <'input,TF:TokenFactory<'input>> TokenFactory<'input> for DynFactory<'input,TF>
// where TF::Tok:CoerceUnsized<Box<dyn Token+'input>>
// {
//
// }

pub type ArenaCommonFactory<'a> = ArenaFactory<'a, CommonTokenFactory, OwningToken>;
pub type ArenaCowFactory<'a> = ArenaFactory<'a, CowTokenFactory, CommonToken<'a>>;

/// This is a wrapper for Token factory that allows to allocate tokens in separate arena.
/// It will allow to significantly improve performance by passing Token references everywhere.
// Box is used here because it is almost always should be used for token factory
pub struct ArenaFactory<'input, TF: TokenFactory<'input, Tok=Box<T>, Inner=T>, T: Token + Clone + 'input> {
arena: Arena<T>,
factory: TF,
pd: PhantomData<&'input str>,
}

impl<'input, TF: TokenFactory<'input, Tok=Box<T>, Inner=T>, T: Token + Clone + 'input> TokenFactory<'input> for ArenaFactory<'input, TF, T> {
type Inner = T;
type Tok = &'input T;

fn create_thin(&self, _ttype: isize, _text: String) -> Box<dyn Token> {
unimplemented!()
fn create<'b: 'input>(&'input self,
source: Option<&mut dyn CharStream<'b>>,
ttype: isize,
text: Option<String>,
channel: isize,
start: isize,
stop: isize,
line: isize,
column: isize,
) -> Self::Tok {
let token = self.factory
.create(source, ttype, text, channel, start, stop, line, column);
self.arena.alloc(*token)
}
}
24 changes: 15 additions & 9 deletions src/common_token_stream.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::borrow::Borrow;
use std::ops::Deref;

use crate::common_token_factory::TokenFactory;
use crate::errors::ANTLRError;
use crate::int_stream::{EOF, IntStream, IterWrapper};
use crate::token::{OwningToken, Token, TOKEN_DEFAULT_CHANNEL, TOKEN_INVALID_TYPE};
Expand Down Expand Up @@ -50,9 +52,9 @@ impl<'input, T: TokenSource<'input>> IntStream for CommonTokenStream<'input, T>
}

impl<'input, T: TokenSource<'input>> TokenStream<'input> for CommonTokenStream<'input, T> {
type Tok = T::Tok;
type TF = T::TF;

fn lt(&mut self, k: isize) -> Option<&Self::Tok> {
fn lt(&mut self, k: isize) -> Option<&<Self::TF as TokenFactory<'input>>::Inner> {
if k == 0 { panic!(); }
if k < 0 { return self.lb(-k); }
let mut i = self.base.p;
Expand All @@ -66,14 +68,18 @@ impl<'input, T: TokenSource<'input>> TokenStream<'input> for CommonTokenStream<'
n += 1;
}
// if ( i>range ) range = i;
return self.base.tokens.get(i as usize).map(Deref::deref)
return self.base.tokens.get(i as usize).map(Borrow::borrow)
}

fn get(&self, index: isize) -> &Self::Tok {
fn get(&self, index: isize) -> &<Self::TF as TokenFactory<'input>>::Inner {
self.base.get(index)
}

fn get_token_source(&self) -> &dyn TokenSource<'input, Tok=Self::Tok> {
fn get_cloned(&self, index: isize) -> <Self::TF as TokenFactory<'input>>::Tok {
self.base.get_cloned(index)
}

fn get_token_source(&self) -> &dyn TokenSource<'input, TF=Self::TF> {
self.base.get_token_source()
}

Expand Down Expand Up @@ -142,15 +148,15 @@ impl<'input, T: TokenSource<'input>> CommonTokenStream<'input, T> {
return self.size() - 1;
}

let mut token = self.base.tokens[i as usize].as_ref();
let mut token = self.base.tokens[i as usize].borrow();
while token.get_channel() != channel {
if token.get_token_type() == EOF || i < 0 {
return i;
}

i += direction;
self.sync(i);
token = self.base.tokens[i as usize].as_ref();
token = self.base.tokens[i as usize].borrow();
}

return i;
Expand Down Expand Up @@ -178,7 +184,7 @@ impl<'input, T: TokenSource<'input>> CommonTokenStream<'input, T> {
//
// fn adjust_seek_index(&self, i: isize) -> int { unimplemented!() }

fn lb(&mut self, k: isize) -> Option<&T::Tok> {
fn lb(&mut self, k: isize) -> Option<&<<Self as TokenStream<'input>>::TF as TokenFactory<'input>>::Inner> {
if k == 0 || (self.base.p - k) < 0 { return None }

let mut i = self.base.p;
Expand All @@ -191,7 +197,7 @@ impl<'input, T: TokenSource<'input>> CommonTokenStream<'input, T> {
}
if i < 0 { return None }

return Some(self.get(i));
return Some(self.get(i).borrow());
}

// fn get_number_of_on_channel_tokens(&self) -> int { unimplemented!() }
Expand Down
8 changes: 7 additions & 1 deletion src/error_strategy.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::borrow::Borrow;
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::fmt;
Expand All @@ -6,6 +7,7 @@ use std::rc::Rc;

use crate::atn_simulator::IATNSimulator;
use crate::atn_state::*;
use crate::common_token_factory::TokenFactory;
use crate::dfa::ScopeExt;
use crate::errors::{ANTLRError, FailedPredicateError, InputMisMatchError, NoViableAltError, RecognitionError};
use crate::interval_set::IntervalSet;
Expand Down Expand Up @@ -165,7 +167,10 @@ impl DefaultErrorStrategy {
// let look_back =
let mut curr = recognizer.get_current_token();
if curr.get_token_type() == TOKEN_EOF {
curr = recognizer.get_input_stream().run(|it| it.get((it.index() - 1).max(0)));
curr = recognizer.get_input_stream()
.run(|it|
it.get((it.index() - 1).max(0))
);
}
let (line, column) = (curr.get_line(), curr.get_column());
*recognizer.get_token_factory()
Expand All @@ -179,6 +184,7 @@ impl DefaultErrorStrategy {
line,
column,
)
// Token::to_owned(token.borrow())
// .modify_with(|it| it.text = token_text)
}

Expand Down
32 changes: 16 additions & 16 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ pub trait Lexer<'input>: TokenSource<'input> + Recognizer {
/// **! Usually generated by ANTLR !**
///
/// This trait combines everything that can be used to extend Lexer behavior
pub trait LexerRecog: Recognizer + Actions + Sized + 'static {
pub trait LexerRecog<T>: Recognizer + Actions<T> + Sized + 'static {
/// Callback to extend emit behavior
fn before_emit(_lexer: &mut <Self as Actions>::Recog) {}
fn before_emit(_lexer: &mut T) {}
}

pub struct BaseLexer<'input,
T: LexerRecog<Recog=Self> + 'static,
T: LexerRecog<Self> + 'static,
TF: TokenFactory<'input> = CommonTokenFactory
> {
pub interpreter: Option<LexerATNSimulator>,
Expand All @@ -72,7 +72,7 @@ pub struct BaseLexer<'input,
pub token_start_column: isize,
current_pos: Rc<LexerPosition>,
pub token_type: isize,
pub token: Option<Box<TF::Tok>>,
pub token: Option<TF::Tok>,
hit_eof: bool,
pub channel: isize,
mode_stack: Vec<usize>,
Expand All @@ -86,15 +86,15 @@ pub(crate) struct LexerPosition {
}

impl<'input, T, TF> Recognizer for BaseLexer<'input, T, TF>
where T: LexerRecog<Recog=Self> + 'static,
where T: LexerRecog<Self> + 'static,
TF: TokenFactory<'input>
{
fn sempred(&mut self, _localctx: &dyn ParserRuleContext, rule_index: isize, action_index: isize) -> bool {
<T as Actions>::sempred(_localctx, rule_index, action_index, self)
<T as Actions<Self>>::sempred(_localctx, rule_index, action_index, self)
}

fn action(&mut self, _localctx: &dyn ParserRuleContext, rule_index: isize, action_index: isize) {
<T as Actions>::action(_localctx, rule_index, action_index, self)
<T as Actions<Self>>::action(_localctx, rule_index, action_index, self)
}
}

Expand All @@ -108,15 +108,15 @@ pub const LEXER_MIN_CHAR_VALUE: isize = 0x0000;
pub const LEXER_MAX_CHAR_VALUE: isize = 0x10FFFF;

impl<'input, 'tokens, T, TF> BaseLexer<'input, T, TF>
where T: LexerRecog<Recog=Self> + 'static,
where T: LexerRecog<Self> + 'static,
TF: TokenFactory<'input>
{
fn emit_token(&mut self, token: Box<TF::Tok>) {
fn emit_token(&mut self, token: TF::Tok) {
self.token = Some(token);
}

fn emit(&mut self) {
<T as LexerRecog>::before_emit(self);
<T as LexerRecog<Self>>::before_emit(self);
let stop = self.get_char_index() - 1;
let token = self.factory.create(
Some(self.input.as_mut().unwrap().as_mut()),
Expand Down Expand Up @@ -215,13 +215,13 @@ impl<'input, 'tokens, T, TF> BaseLexer<'input, T, TF>
}

impl<'input, T, TF> TokenSource<'input> for BaseLexer<'input, T, TF>
where T: LexerRecog<Recog=Self> + 'static,
where T: LexerRecog<Self> + 'static,
TF: TokenFactory<'input>
{
type Tok = TF::Tok;
type TF = TF;

#[allow(unused_labels)]
fn next_token(&mut self) -> Box<Self::Tok> {
fn next_token(&mut self) -> <Self::TF as TokenFactory<'input>>::Tok {
assert!(self.input.is_some());

let _marker = self.input.as_mut().unwrap().mark();
Expand Down Expand Up @@ -312,13 +312,13 @@ impl<'input, T, TF> TokenSource<'input> for BaseLexer<'input, T, TF>
// self.factory = f;
// }

fn get_token_factory(&self) -> &'input dyn TokenFactory<'input, Tok=Self::Tok> {
fn get_token_factory(&self) -> &'input TF {
self.factory
}
}

fn notify_listeners<'input, T, TF>(_liseners: &mut Vec<Box<dyn ErrorListener>>, e: &ANTLRError, lexer: &BaseLexer<'input, T, TF>)
where T: LexerRecog<Recog=BaseLexer<'input, T, TF>> + 'static,
where T: LexerRecog<BaseLexer<'input, T, TF>> + 'static,
TF: TokenFactory<'input>
{
let text = format!("token recognition error at: '{}'", lexer.input.as_ref().unwrap().get_text(lexer.token_start_char_index, lexer.get_char_index()));
Expand All @@ -329,7 +329,7 @@ fn notify_listeners<'input, T, TF>(_liseners: &mut Vec<Box<dyn ErrorListener>>,


impl<'input, T, TF> Lexer<'input> for BaseLexer<'input, T, TF>
where T: LexerRecog<Recog=Self> + 'static,
where T: LexerRecog<Self> + 'static,
TF: TokenFactory<'input>
{
fn set_channel(&mut self, v: isize) {
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#![feature(specialization)]
#![feature(coerce_unsized)]
#![feature(unsize)]
#![feature(associated_type_defaults)]
#![warn(rust_2018_idioms)]
#![warn(missing_docs)] // warn if there is missing docs
#![warn(missing_debug_implementations)]
Expand Down
Loading

0 comments on commit 466b370

Please sign in to comment.