From 466b370dcc22904aa57193c3f84ec4e4846f12f1 Mon Sep 17 00:00:00 2001 From: Konstantin Anisimov Date: Mon, 6 Apr 2020 00:24:37 +0300 Subject: [PATCH] wip zerocopy x2, lib compiles successfully --- build.rs | 1 + src/common_token_factory.rs | 80 ++++++++++++++++++++++++++++--------- src/common_token_stream.rs | 24 ++++++----- src/error_strategy.rs | 8 +++- src/lexer.rs | 32 +++++++-------- src/lib.rs | 1 + src/parser.rs | 59 ++++++++++++++------------- src/parser_atn_simulator.rs | 9 +++-- src/parser_rule_context.rs | 9 +++-- src/recognizer.rs | 4 +- src/token.rs | 69 +++++++++++++++++++++++++------- src/token_source.rs | 14 ++++--- src/token_stream.rs | 59 +++++++++++++++------------ 13 files changed, 241 insertions(+), 128 deletions(-) diff --git a/build.rs b/build.rs index bd1fcfc..c7933bb 100644 --- a/build.rs +++ b/build.rs @@ -17,6 +17,7 @@ fn main() { } println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed=/home/rrevenantt/dev/antlr4/tool/target/antlr4-4.8-2-SNAPSHOT-complete.jar"); } diff --git a/src/common_token_factory.rs b/src/common_token_factory.rs index 088d4e3..6d02e06 100644 --- a/src/common_token_factory.rs +++ b/src/common_token_factory.rs @@ -1,21 +1,32 @@ +use std::borrow::{Borrow, BorrowMut}; use std::borrow::Cow::{Borrowed, Owned}; -use std::marker::Unsize; +use std::cell::Cell; +use std::marker::{PhantomData, Unsize}; use std::ops::CoerceUnsized; +use typed_arena::Arena; + use crate::char_stream::CharStream; use crate::token::{CommonToken, OwningToken, TOKEN_INVALID_TYPE}; use crate::token::Token; lazy_static! { pub static ref CommonTokenFactoryDEFAULT: Box = - Box::new(CommonTokenFactory::new()); - pub static ref INVALID_TOKEN:Box = CommonTokenFactoryDEFAULT.as_ref().create(None,TOKEN_INVALID_TYPE,None,0,-1,-1,-1,-1).to_owned(); + Box::new(CommonTokenFactory{}); +} + +thread_local! { + pub static INVALID_TOKEN:Box = CommonTokenFactoryDEFAULT.as_ref().create(None,TOKEN_INVALID_TYPE,None,0,-1,-1,-1,-1).to_owned(); } -/// This is a trait for creating tokens -pub trait TokenFactory<'a>: Sync { +// todo remove redundant allocation for arenas + +/// Trait for creating tokens +pub trait TokenFactory<'a> { /// type of tokens emitted by this factory - type Tok: Token + ?Sized + Unsize + 'a; + type Inner: Token + ?Sized + Unsize + 'a; + type Tok: Borrow + Clone; + fn create<'b: 'a>(&'a self, source: Option<&mut dyn CharStream<'b>>, ttype: isize, @@ -25,14 +36,15 @@ pub trait TokenFactory<'a>: Sync { stop: isize, line: isize, column: isize, - ) -> Box; + ) -> Self::Tok; } #[derive(Default)] pub struct CowTokenFactory; impl<'a> TokenFactory<'a> for CowTokenFactory { - type Tok = CommonToken<'a>; + type Inner = CommonToken<'a>; + type Tok = Box; fn create<'b: 'a>(&'a self, source: Option<&mut dyn CharStream<'b>>, @@ -43,7 +55,7 @@ impl<'a> TokenFactory<'a> for CowTokenFactory { stop: isize, line: isize, column: isize, - ) -> Box { + ) -> Self::Tok { let text = match (text, source) { (Some(t), _) => Owned(t), @@ -58,7 +70,7 @@ impl<'a> TokenFactory<'a> for CowTokenFactory { channel, start, stop, - token_index: -1, + token_index: Cell::new(-1), line, column, text, @@ -71,7 +83,8 @@ impl<'a> TokenFactory<'a> for CowTokenFactory { pub struct CommonTokenFactory {} impl<'a> TokenFactory<'a> for CommonTokenFactory { - type Tok = OwningToken; + type Inner = OwningToken; + type Tok = Box; fn create<'b: 'a>(&'a self, source: Option<&mut dyn CharStream<'b>>, @@ -82,7 +95,7 @@ impl<'a> TokenFactory<'a> for CommonTokenFactory { stop: isize, line: isize, column: isize, - ) -> Box { + ) -> Self::Tok { let text = match (text, source) { (Some(t), _) => t, @@ -97,7 +110,7 @@ impl<'a> TokenFactory<'a> for CommonTokenFactory { channel, start, stop, - token_index: -1, + token_index: Cell::new(-1), line, column, text, @@ -106,12 +119,41 @@ impl<'a> TokenFactory<'a> for CommonTokenFactory { } } -impl CommonTokenFactory { - pub fn new() -> CommonTokenFactory { - CommonTokenFactory {} - } +// pub struct DynFactory<'input,TF:TokenFactory<'input>>(TF) where TF::Tok:CoerceUnsized>; +// impl <'input,TF:TokenFactory<'input>> TokenFactory<'input> for DynFactory<'input,TF> +// where TF::Tok:CoerceUnsized> +// { +// +// } + +pub type ArenaCommonFactory<'a> = ArenaFactory<'a, CommonTokenFactory, OwningToken>; +pub type ArenaCowFactory<'a> = ArenaFactory<'a, CowTokenFactory, CommonToken<'a>>; + +/// This is a wrapper for Token factory that allows to allocate tokens in separate arena. +/// It will allow to significantly improve performance by passing Token references everywhere. +// Box is used here because it is almost always should be used for token factory +pub struct ArenaFactory<'input, TF: TokenFactory<'input, Tok=Box, Inner=T>, T: Token + Clone + 'input> { + arena: Arena, + factory: TF, + pd: PhantomData<&'input str>, +} + +impl<'input, TF: TokenFactory<'input, Tok=Box, Inner=T>, T: Token + Clone + 'input> TokenFactory<'input> for ArenaFactory<'input, TF, T> { + type Inner = T; + type Tok = &'input T; - fn create_thin(&self, _ttype: isize, _text: String) -> Box { - unimplemented!() + fn create<'b: 'input>(&'input self, + source: Option<&mut dyn CharStream<'b>>, + ttype: isize, + text: Option, + channel: isize, + start: isize, + stop: isize, + line: isize, + column: isize, + ) -> Self::Tok { + let token = self.factory + .create(source, ttype, text, channel, start, stop, line, column); + self.arena.alloc(*token) } } diff --git a/src/common_token_stream.rs b/src/common_token_stream.rs index b8310b4..eb80273 100644 --- a/src/common_token_stream.rs +++ b/src/common_token_stream.rs @@ -1,5 +1,7 @@ +use std::borrow::Borrow; use std::ops::Deref; +use crate::common_token_factory::TokenFactory; use crate::errors::ANTLRError; use crate::int_stream::{EOF, IntStream, IterWrapper}; use crate::token::{OwningToken, Token, TOKEN_DEFAULT_CHANNEL, TOKEN_INVALID_TYPE}; @@ -50,9 +52,9 @@ impl<'input, T: TokenSource<'input>> IntStream for CommonTokenStream<'input, T> } impl<'input, T: TokenSource<'input>> TokenStream<'input> for CommonTokenStream<'input, T> { - type Tok = T::Tok; + type TF = T::TF; - fn lt(&mut self, k: isize) -> Option<&Self::Tok> { + fn lt(&mut self, k: isize) -> Option<&>::Inner> { if k == 0 { panic!(); } if k < 0 { return self.lb(-k); } let mut i = self.base.p; @@ -66,14 +68,18 @@ impl<'input, T: TokenSource<'input>> TokenStream<'input> for CommonTokenStream<' n += 1; } // if ( i>range ) range = i; - return self.base.tokens.get(i as usize).map(Deref::deref) + return self.base.tokens.get(i as usize).map(Borrow::borrow) } - fn get(&self, index: isize) -> &Self::Tok { + fn get(&self, index: isize) -> &>::Inner { self.base.get(index) } - fn get_token_source(&self) -> &dyn TokenSource<'input, Tok=Self::Tok> { + fn get_cloned(&self, index: isize) -> >::Tok { + self.base.get_cloned(index) + } + + fn get_token_source(&self) -> &dyn TokenSource<'input, TF=Self::TF> { self.base.get_token_source() } @@ -142,7 +148,7 @@ impl<'input, T: TokenSource<'input>> CommonTokenStream<'input, T> { return self.size() - 1; } - let mut token = self.base.tokens[i as usize].as_ref(); + let mut token = self.base.tokens[i as usize].borrow(); while token.get_channel() != channel { if token.get_token_type() == EOF || i < 0 { return i; @@ -150,7 +156,7 @@ impl<'input, T: TokenSource<'input>> CommonTokenStream<'input, T> { i += direction; self.sync(i); - token = self.base.tokens[i as usize].as_ref(); + token = self.base.tokens[i as usize].borrow(); } return i; @@ -178,7 +184,7 @@ impl<'input, T: TokenSource<'input>> CommonTokenStream<'input, T> { // // fn adjust_seek_index(&self, i: isize) -> int { unimplemented!() } - fn lb(&mut self, k: isize) -> Option<&T::Tok> { + fn lb(&mut self, k: isize) -> Option<&<>::TF as TokenFactory<'input>>::Inner> { if k == 0 || (self.base.p - k) < 0 { return None } let mut i = self.base.p; @@ -191,7 +197,7 @@ impl<'input, T: TokenSource<'input>> CommonTokenStream<'input, T> { } if i < 0 { return None } - return Some(self.get(i)); + return Some(self.get(i).borrow()); } // fn get_number_of_on_channel_tokens(&self) -> int { unimplemented!() } diff --git a/src/error_strategy.rs b/src/error_strategy.rs index a48477d..1a6be8a 100644 --- a/src/error_strategy.rs +++ b/src/error_strategy.rs @@ -1,3 +1,4 @@ +use std::borrow::Borrow; use std::error::Error; use std::fmt::{Display, Formatter}; use std::fmt; @@ -6,6 +7,7 @@ use std::rc::Rc; use crate::atn_simulator::IATNSimulator; use crate::atn_state::*; +use crate::common_token_factory::TokenFactory; use crate::dfa::ScopeExt; use crate::errors::{ANTLRError, FailedPredicateError, InputMisMatchError, NoViableAltError, RecognitionError}; use crate::interval_set::IntervalSet; @@ -165,7 +167,10 @@ impl DefaultErrorStrategy { // let look_back = let mut curr = recognizer.get_current_token(); if curr.get_token_type() == TOKEN_EOF { - curr = recognizer.get_input_stream().run(|it| it.get((it.index() - 1).max(0))); + curr = recognizer.get_input_stream() + .run(|it| + it.get((it.index() - 1).max(0)) + ); } let (line, column) = (curr.get_line(), curr.get_column()); *recognizer.get_token_factory() @@ -179,6 +184,7 @@ impl DefaultErrorStrategy { line, column, ) + // Token::to_owned(token.borrow()) // .modify_with(|it| it.text = token_text) } diff --git a/src/lexer.rs b/src/lexer.rs index e93983a..65d3b2d 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -50,13 +50,13 @@ pub trait Lexer<'input>: TokenSource<'input> + Recognizer { /// **! Usually generated by ANTLR !** /// /// This trait combines everything that can be used to extend Lexer behavior -pub trait LexerRecog: Recognizer + Actions + Sized + 'static { +pub trait LexerRecog: Recognizer + Actions + Sized + 'static { /// Callback to extend emit behavior - fn before_emit(_lexer: &mut ::Recog) {} + fn before_emit(_lexer: &mut T) {} } pub struct BaseLexer<'input, - T: LexerRecog + 'static, + T: LexerRecog + 'static, TF: TokenFactory<'input> = CommonTokenFactory > { pub interpreter: Option, @@ -72,7 +72,7 @@ pub struct BaseLexer<'input, pub token_start_column: isize, current_pos: Rc, pub token_type: isize, - pub token: Option>, + pub token: Option, hit_eof: bool, pub channel: isize, mode_stack: Vec, @@ -86,15 +86,15 @@ pub(crate) struct LexerPosition { } impl<'input, T, TF> Recognizer for BaseLexer<'input, T, TF> - where T: LexerRecog + 'static, + where T: LexerRecog + 'static, TF: TokenFactory<'input> { fn sempred(&mut self, _localctx: &dyn ParserRuleContext, rule_index: isize, action_index: isize) -> bool { - ::sempred(_localctx, rule_index, action_index, self) + >::sempred(_localctx, rule_index, action_index, self) } fn action(&mut self, _localctx: &dyn ParserRuleContext, rule_index: isize, action_index: isize) { - ::action(_localctx, rule_index, action_index, self) + >::action(_localctx, rule_index, action_index, self) } } @@ -108,15 +108,15 @@ pub const LEXER_MIN_CHAR_VALUE: isize = 0x0000; pub const LEXER_MAX_CHAR_VALUE: isize = 0x10FFFF; impl<'input, 'tokens, T, TF> BaseLexer<'input, T, TF> - where T: LexerRecog + 'static, + where T: LexerRecog + 'static, TF: TokenFactory<'input> { - fn emit_token(&mut self, token: Box) { + fn emit_token(&mut self, token: TF::Tok) { self.token = Some(token); } fn emit(&mut self) { - ::before_emit(self); + >::before_emit(self); let stop = self.get_char_index() - 1; let token = self.factory.create( Some(self.input.as_mut().unwrap().as_mut()), @@ -215,13 +215,13 @@ impl<'input, 'tokens, T, TF> BaseLexer<'input, T, TF> } impl<'input, T, TF> TokenSource<'input> for BaseLexer<'input, T, TF> - where T: LexerRecog + 'static, + where T: LexerRecog + 'static, TF: TokenFactory<'input> { - type Tok = TF::Tok; + type TF = TF; #[allow(unused_labels)] - fn next_token(&mut self) -> Box { + fn next_token(&mut self) -> >::Tok { assert!(self.input.is_some()); let _marker = self.input.as_mut().unwrap().mark(); @@ -312,13 +312,13 @@ impl<'input, T, TF> TokenSource<'input> for BaseLexer<'input, T, TF> // self.factory = f; // } - fn get_token_factory(&self) -> &'input dyn TokenFactory<'input, Tok=Self::Tok> { + fn get_token_factory(&self) -> &'input TF { self.factory } } fn notify_listeners<'input, T, TF>(_liseners: &mut Vec>, e: &ANTLRError, lexer: &BaseLexer<'input, T, TF>) - where T: LexerRecog> + 'static, + where T: LexerRecog> + 'static, TF: TokenFactory<'input> { let text = format!("token recognition error at: '{}'", lexer.input.as_ref().unwrap().get_text(lexer.token_start_char_index, lexer.get_char_index())); @@ -329,7 +329,7 @@ fn notify_listeners<'input, T, TF>(_liseners: &mut Vec>, impl<'input, T, TF> Lexer<'input> for BaseLexer<'input, T, TF> - where T: LexerRecog + 'static, + where T: LexerRecog + 'static, TF: TokenFactory<'input> { fn set_channel(&mut self, v: isize) { diff --git a/src/lib.rs b/src/lib.rs index df9f503..291239b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ #![feature(specialization)] #![feature(coerce_unsized)] #![feature(unsize)] +#![feature(associated_type_defaults)] #![warn(rust_2018_idioms)] #![warn(missing_docs)] // warn if there is missing docs #![warn(missing_debug_implementations)] diff --git a/src/parser.rs b/src/parser.rs index 4ab2517..bb5b269 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,4 +1,5 @@ use std::any::Any; +use std::borrow::Borrow; use std::cell::{Cell, RefCell}; use std::collections::HashMap; use std::marker::{PhantomData, Unsize}; @@ -9,7 +10,7 @@ use std::sync::Arc; use crate::atn::ATN; use crate::atn_simulator::IATNSimulator; -use crate::common_token_factory::TokenFactory; +use crate::common_token_factory::{CommonTokenFactory, TokenFactory}; use crate::error_listener::{ConsoleErrorListener, ErrorListener, ProxyErrorListener}; use crate::error_strategy::ErrorStrategy; use crate::errors::ANTLRError; @@ -29,7 +30,7 @@ pub trait Parser<'input>: Recognizer { // type Tok: Token + ?Sized + Unsize + 'input; fn get_interpreter(&self) -> &ParserATNSimulator; - fn get_token_factory(&self) -> &'input dyn TokenFactory<'input, Tok=OwningToken>; + fn get_token_factory(&self) -> &'input CommonTokenFactory; fn get_parser_rule_context(&self) -> &ParserRuleContextType; // fn set_parser_rule_context(&self, v: ParserRuleContext); fn consume(&mut self, err_handler: &mut dyn ErrorStrategy); @@ -40,8 +41,8 @@ pub trait Parser<'input>: Recognizer { // fn get_error_handler(&self) -> ErrorStrategy; // fn set_error_handler(&self, e: ErrorStrategy); - fn get_input_stream_mut(&mut self) -> &mut dyn TokenStream<'input, Tok=dyn Token + 'input>; - fn get_input_stream(&self) -> &dyn TokenStream<'input, Tok=dyn Token + 'input>; + fn get_input_stream_mut(&mut self) -> &mut dyn TokenStream<'input, TF=CommonTokenFactory>; + fn get_input_stream(&self) -> &dyn TokenStream<'input, TF=CommonTokenFactory>; fn get_current_token(&self) -> &(dyn Token + 'input); fn get_expected_tokens(&self) -> IntervalSet; @@ -64,8 +65,8 @@ pub trait Parser<'input>: Recognizer { /// Generated parser hides complexity of this struct and expose required flexibility via generics /// pub struct BaseParser<'input, - Ext: ParserRecog + 'static, - I: TokenStream<'input, Tok=dyn Token + 'input>, + Ext: ParserRecog + 'static, + I: TokenStream<'input> + ?Sized = dyn TokenStream<'input, TF=CommonTokenFactory>, T: ParseTreeListener + ?Sized + 'static = dyn ParseTreeListener> { interp: Arc, pub ctx: Option, @@ -98,13 +99,14 @@ pub struct BaseParser<'input, _syntax_errors: Cell, error_listeners: RefCell>>, - ext: Ext + ext: Ext, + pd: PhantomData &'input str> } impl<'input, I, T, Ext> Deref for BaseParser<'input, Ext, I, T> where T: ParseTreeListener + ?Sized + 'static, - I: TokenStream<'input, Tok=dyn Token + 'input>, - Ext: ParserRecog + 'static + I: TokenStream<'input> + ?Sized, + Ext: ParserRecog + 'static { type Target = Ext; @@ -115,8 +117,8 @@ impl<'input, I, T, Ext> Deref for BaseParser<'input, Ext, I, T> impl<'input, I, T, Ext> DerefMut for BaseParser<'input, Ext, I, T> where T: ParseTreeListener + ?Sized + 'static, - I: TokenStream<'input, Tok=dyn Token + 'input>, - Ext: ParserRecog + 'static + I: TokenStream<'input> + ?Sized, + Ext: ParserRecog + 'static { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.ext @@ -124,15 +126,15 @@ impl<'input, I, T, Ext> DerefMut for BaseParser<'input, Ext, I, T> } /// -pub trait ParserRecog: Recognizer + Actions {} +pub trait ParserRecog: Recognizer + Actions {} impl<'input, I, T, Ext> Recognizer for BaseParser<'input, Ext, I, T> where T: ParseTreeListener + ?Sized + 'static, - I: TokenStream<'input, Tok=dyn Token + 'input>, - Ext: ParserRecog + 'static + I: TokenStream<'input> + ?Sized, + Ext: ParserRecog + 'static { fn sempred(&mut self, localctx: &dyn ParserRuleContext, rule_index: isize, action_index: isize) -> bool { - ::sempred(localctx, rule_index, action_index, self) + >::sempred(localctx, rule_index, action_index, self) } fn get_rule_names(&self) -> &[&str] { @@ -154,8 +156,8 @@ impl<'input, I, T, Ext> Recognizer for BaseParser<'input, Ext, I, T> impl<'input, I, T, Ext> Parser<'input> for BaseParser<'input, Ext, I, T> where T: ParseTreeListener + ?Sized + 'static, - I: TokenStream<'input, Tok=dyn Token + 'input>, - Ext: ParserRecog + 'static + I: TokenStream<'input> + ?Sized, + Ext: ParserRecog + 'static { // type Tok = I::Tok; @@ -163,7 +165,7 @@ impl<'input, I, T, Ext> Parser<'input> for BaseParser<'input, Ext, I, T> self.interp.as_ref() } - fn get_token_factory(&self) -> &'input dyn TokenFactory<'input, Tok=OwningToken> { + fn get_token_factory(&self) -> &'input CommonTokenFactory { &**crate::common_token_factory::CommonTokenFactoryDEFAULT // self.input.get_token_source().get_token_factory() } @@ -201,16 +203,18 @@ impl<'input, I, T, Ext> Parser<'input> for BaseParser<'input, Ext, I, T> precedence >= self.get_precedence() } - fn get_input_stream_mut(&mut self) -> &mut dyn TokenStream<'input, Tok=dyn Token + 'input> { - self.input.as_mut() + fn get_input_stream_mut(&mut self) -> &mut dyn TokenStream<'input, TF=CommonTokenFactory> { + unimplemented!() + // self.input.as_mut() } - fn get_input_stream(&self) -> &dyn TokenStream<'input, Tok=dyn Token + 'input> { - self.input.as_ref() + fn get_input_stream(&self) -> &dyn TokenStream<'input, TF=CommonTokenFactory> { + unimplemented!() + // self.input.as_ref() } fn get_current_token(&self) -> &(dyn Token + 'input) { - self.input.get(self.input.index()) + self.input.get(self.input.index()).borrow() } fn get_expected_tokens(&self) -> IntervalSet { @@ -225,7 +229,7 @@ impl<'input, I, T, Ext> Parser<'input> for BaseParser<'input, Ext, I, T> self._syntax_errors.update(|it| it + 1); let offending_token = match offending_token { None => Some(self.get_current_token()), - Some(x) => Some(self.input.get(x)), + Some(x) => Some(self.input.get(x) as &dyn Token), }; let line = offending_token.map(|x| x.get_line()).unwrap_or(-1); let column = offending_token.map(|x| x.get_column()).unwrap_or(-1); @@ -279,8 +283,8 @@ impl<'input, I, T, Ext> Parser<'input> for BaseParser<'input, Ext, I, T> impl<'input, I, T, Ext> BaseParser<'input, Ext, I, T> where T: ParseTreeListener + ?Sized + 'static, - I: TokenStream<'input, Tok=dyn Token + 'input>, - Ext: ParserRecog + 'static + I: TokenStream<'input> + ?Sized, + Ext: ParserRecog + 'static { pub fn new_base_parser( input: Box, @@ -298,7 +302,8 @@ impl<'input, I, T, Ext> BaseParser<'input, Ext, I, T> parse_listeners: vec![], _syntax_errors: Cell::new(0), error_listeners: RefCell::new(vec![Box::new(ConsoleErrorListener {})]), - ext + ext, + pd: PhantomData } } diff --git a/src/parser_atn_simulator.rs b/src/parser_atn_simulator.rs index 3a3cca4..87d242e 100644 --- a/src/parser_atn_simulator.rs +++ b/src/parser_atn_simulator.rs @@ -17,6 +17,7 @@ use crate::atn_config_set::ATNConfigSet; use crate::atn_simulator::{BaseATNSimulator, IATNSimulator}; use crate::atn_state::{ATNDecisionState, ATNState, ATNSTATE_BLOCK_END, ATNStateRef, ATNStateType}; use crate::atn_state::ATNStateType::RuleStopState; +use crate::common_token_factory::CommonTokenFactory; use crate::dfa::{DFA, ScopeExt}; use crate::dfa_state::{DFAState, DFAStateRef, PredPrediction}; use crate::errors::{ANTLRError, NoViableAltError}; @@ -89,7 +90,7 @@ struct Local<'a, 'text: 'a> { } impl<'text> Local<'_, 'text> { - fn input(&mut self) -> &mut dyn TokenStream<'text, Tok=dyn Token + 'text> { self.parser.get_input_stream_mut() } + fn input(&mut self) -> &mut dyn TokenStream<'text, TF=CommonTokenFactory> { self.parser.get_input_stream_mut() } fn seek(&mut self, i: isize) { self.input().seek(i) } fn outer_context(&self) -> &dyn ParserRuleContext { self.outer_context.deref() } } @@ -1107,8 +1108,10 @@ impl ParserATNSimulator { // fn no_viable_alt(&self, local: &mut Local, _configs: &ATNConfigSet, start_index: isize) -> ANTLRError { - let start_token = local.parser.get_input_stream().get(start_index).to_owned(); - let offending_token = local.input().lt(1).unwrap().to_owned(); + let start_token = local.parser.get_input_stream().get(start_index); + let start_token = Token::to_owned(start_token); + let offending_token = local.input().lt(1).unwrap(); + let offending_token = Token::to_owned(offending_token); ANTLRError::NoAltError(NoViableAltError::new_full( local.parser, start_token, diff --git a/src/parser_rule_context.rs b/src/parser_rule_context.rs index b30afb5..3bfe2da 100644 --- a/src/parser_rule_context.rs +++ b/src/parser_rule_context.rs @@ -1,6 +1,7 @@ use std::any::{Any, type_name, TypeId}; use std::borrow::{Borrow, BorrowMut}; use std::cell::{Ref, RefCell}; +use std::convert::identity; use std::fmt::{Debug, Error, Formatter}; use std::ops::{Deref, DerefMut}; use std::rc::Rc; @@ -223,7 +224,7 @@ impl ParserRuleContext for BaseParserRuleContext { } fn set_start(&self, t: Option) { - *self.start.borrow_mut() = t.unwrap_or((**INVALID_TOKEN).clone()); + *self.start.borrow_mut() = t.unwrap_or(INVALID_TOKEN.with(|x| (**x).clone())); } fn get_start(&self) -> Ref<'_, OwningToken> { @@ -231,7 +232,7 @@ impl ParserRuleContext for BaseParserRuleContext { } fn set_stop(&self, t: Option) { - *self.stop.borrow_mut() = t.unwrap_or((**INVALID_TOKEN).clone()); + *self.stop.borrow_mut() = t.unwrap_or(INVALID_TOKEN.with(|x| (**x).clone())); } fn get_stop(&self) -> Ref<'_, OwningToken> { @@ -337,8 +338,8 @@ impl BaseParserRuleContext { pub fn new_parser_ctx(parent_ctx: Option, invoking_state: isize, ext: Ctx) -> Self { BaseParserRuleContext { base: BaseRuleContext::new_ctx(parent_ctx, invoking_state, ext), - start: RefCell::new((**INVALID_TOKEN).clone()), - stop: RefCell::new((**INVALID_TOKEN).clone()), + start: RefCell::new(INVALID_TOKEN.with(|x| (**x).clone())), + stop: RefCell::new(INVALID_TOKEN.with(|x| (**x).clone())), exception: None, children: RefCell::new(vec![]), } diff --git a/src/recognizer.rs b/src/recognizer.rs index 98cf1f9..653f4a9 100644 --- a/src/recognizer.rs +++ b/src/recognizer.rs @@ -42,13 +42,13 @@ pub trait Recognizer { pub trait Actions { type Recog: ?Sized; fn sempred(_localctx: &dyn ParserRuleContext, _rule_index: isize, _action_index: isize, - _recog: &mut Self::Recog, + _recog: &mut T, ) -> bool { true } fn action(_localctx: &dyn ParserRuleContext, _rule_index: isize, _action_index: isize, - _recog: &mut Self::Recog, + _recog: &mut T, ) {} } diff --git a/src/token.rs b/src/token.rs index e6b38f9..06e1812 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1,7 +1,8 @@ -use std::borrow::{Borrow, Cow}; +use std::borrow::{Borrow, BorrowMut, Cow}; +use std::cell::Cell; use std::fmt::{Debug, Display}; use std::fmt::Formatter; -use std::ops::{CoerceUnsized, Deref}; +use std::ops::{CoerceUnsized, Deref, DerefMut}; use crate::char_stream::CharStream; use crate::int_stream::EOF; @@ -15,7 +16,12 @@ pub const TOKEN_DEFAULT_CHANNEL: isize = 0; pub const TOKEN_HIDDEN_CHANNEL: isize = 1; pub const HIDDEN: isize = TOKEN_HIDDEN_CHANNEL; - +/// Trait for custom token implementations +/// +/// For proper parsing, implementations must have valid implementations +/// for at least token type and token index. +/// +/// Other members are mostly required for error reporting pub trait Token: Debug { // fn get_source(&self) -> Option<(Box, Box)>; fn get_token_type(&self) -> isize; @@ -29,7 +35,7 @@ pub trait Token: Debug { fn set_text(&self, text: String); fn get_token_index(&self) -> isize; - fn set_token_index(&mut self, v: isize); + fn set_token_index(&self, v: isize); // fn get_token_source(&self) -> &dyn TokenSource; // fn get_input_stream(&self) -> &dyn CharStream; @@ -37,14 +43,46 @@ pub trait Token: Debug { fn to_owned(&self) -> OwningToken; } -// impl ToOwned for dyn Token{ -// type Owned = (); +///automatically implemented interface for passing tokens behind different kinds of ownership +// pub trait TokenWrapper<'ref>: BorrowMut<::Inner>{ +// type Inner:Token + ?Sized + 'ref; +// } // -// fn to_owned(&self) -> Self::Owned { -// unimplemented!() -// } +// impl<'a,T:Token + ?Sized + 'a> TokenWrapper<'a> for Box { +// type Inner = T; +// } +// +// impl<'a,T:Token + ?Sized + 'a> TokenWrapper<'a> for Box { +// type Inner = T; +// } +// +// impl<'a,T:Token + ?Sized + 'a> TokenWrapper<'a> for &'a mut T { +// type Inner = ; // } +// impl::Target>> Token for T where ::Target:Token{ +// fn get_token_type(&self) -> isize { self.deref().get_token_type() } +// +// fn get_channel(&self) -> isize { self.deref().get_channel() } +// +// fn get_start(&self) -> isize { self.deref().get_start() } +// +// fn get_stop(&self) -> isize { self.deref().get_stop() } +// +// fn get_line(&self) -> isize { self.deref().get_line() } +// +// fn get_column(&self) -> isize { self.deref().get_column() } +// +// fn get_text(&self) -> &str {self.deref().get_text()} +// +// fn set_text(&self, text: String) {self.deref().set_text()} +// +// fn get_token_index(&self) -> isize {self.deref().get_token_index()} +// +// fn set_token_index(&mut self, v: isize) {self.deref().set_token_index()} +// +// fn to_owned(&self) -> OwningToken {self.deref().to_owned()} +// } pub type OwningToken = GenericToken; pub type CommonToken<'a> = GenericToken>; @@ -56,7 +94,7 @@ pub struct GenericToken + Debug = String> { pub channel: isize, pub start: isize, pub stop: isize, - pub token_index: isize, + pub token_index: Cell, pub line: isize, pub column: isize, pub text: T, @@ -71,7 +109,7 @@ impl + Debug> Display for GenericToken { let txt = txt.replace("\t", "\\t"); // let txt = escape_whitespaces(txt,false); f.write_fmt(format_args!("[@{},{}:{}='{}',<{}>{},{}:{}]", - self.token_index, + self.token_index.get(), self.start, self.stop, txt, @@ -83,6 +121,7 @@ impl + Debug> Display for GenericToken { } } +// impl + Debug> TokenWrapper for GenericToken { type Inner = Self; } impl + Debug> Token for GenericToken { fn get_channel(&self) -> isize { @@ -114,11 +153,11 @@ impl + Debug> Token for GenericToken { // } fn get_token_index(&self) -> isize { - self.token_index + self.token_index.get() } - fn set_token_index(&mut self, _v: isize) { - self.token_index = _v + fn set_token_index(&self, _v: isize) { + self.token_index.set(_v) } // fn get_token_source(&self) -> &dyn TokenSource { @@ -147,7 +186,7 @@ impl + Debug> Token for GenericToken { channel: self.channel, start: self.start, stop: self.stop, - token_index: self.token_type, + token_index: self.token_index.clone(), line: self.line, column: self.column, text: self.text.borrow().to_owned(), diff --git a/src/token_source.rs b/src/token_source.rs index a1c9e84..78901cc 100644 --- a/src/token_source.rs +++ b/src/token_source.rs @@ -1,4 +1,6 @@ +use std::fmt::Debug; use std::marker::Unsize; +use std::ops::Deref; use crate::char_stream::CharStream; use crate::common_token_factory::TokenFactory; @@ -8,8 +10,8 @@ use crate::token::{Token, TOKEN_DEFAULT_CHANNEL}; /// Provides tokens for parser via `TokenStream` pub trait TokenSource<'input> { ///Type of tokens, produced by this source - type Tok: Token + ?Sized + Unsize + 'input; - fn next_token(&mut self) -> Box; + type TF: TokenFactory<'input>; + fn next_token(&mut self) -> >::Tok; /** * Get the line number for the current position in the input stream. The * first line in the input is line 1. @@ -35,15 +37,15 @@ pub trait TokenSource<'input> { /// Required by `Parser` for creating missing tokens. /// /// @return The {@link TokenFactory} currently used by this token source. - fn get_token_factory(&self) -> &'input dyn TokenFactory<'input, Tok=Self::Tok>; + fn get_token_factory(&self) -> &'input Self::TF; } // allows user to call parser with &mut reference to Lexer impl<'input, T> TokenSource<'input> for &mut T where T: TokenSource<'input> { - type Tok = T::Tok; + type TF = T::TF; #[inline(always)] - fn next_token(&mut self) -> Box { + fn next_token(&mut self) -> >::Tok { (**self).next_token() } @@ -68,7 +70,7 @@ impl<'input, T> TokenSource<'input> for &mut T where T: TokenSource<'input> { } #[inline(always)] - fn get_token_factory(&self) -> &'input dyn TokenFactory<'input, Tok=T::Tok> { + fn get_token_factory(&self) -> &'input Self::TF { (**self).get_token_factory() } } diff --git a/src/token_stream.rs b/src/token_stream.rs index 19aa766..8543b27 100644 --- a/src/token_stream.rs +++ b/src/token_stream.rs @@ -1,8 +1,9 @@ +use std::borrow::{Borrow, BorrowMut}; use std::cmp::min; use std::marker::{PhantomData, Unsize}; use std::ops::Deref; -use crate::common_token_factory::CommonTokenFactory; +use crate::common_token_factory::{CommonTokenFactory, TokenFactory}; use crate::errors::ANTLRError; use crate::int_stream::{IntStream, IterWrapper}; use crate::token::{OwningToken, Token, TOKEN_EOF, TOKEN_INVALID_TYPE}; @@ -15,10 +16,11 @@ use crate::token_source::TokenSource; /// `TokenSource`, not `TokenStream` pub trait TokenStream<'input>: IntStream { /// Output token type - type Tok: Token + ?Sized + Unsize + 'input; - fn lt(&mut self, k: isize) -> Option<&Self::Tok>; - fn get(&self, index: isize) -> &Self::Tok; - fn get_token_source(&self) -> &dyn TokenSource<'input, Tok=Self::Tok>; + type TF: TokenFactory<'input>; + fn lt(&mut self, k: isize) -> Option<&>::Inner>; + fn get(&self, index: isize) -> &>::Inner; + fn get_cloned(&self, index: isize) -> >::Tok; + fn get_token_source(&self) -> &dyn TokenSource<'input, TF=Self::TF>; // fn set_token_source(&self,source: Box); fn get_all_text(&self) -> String; fn get_text_from_interval(&self, start: isize, stop: isize) -> String; @@ -42,7 +44,7 @@ impl<'a, 'input: 'a, T: TokenStream<'input>> Iterator for TokenIter<'a, 'input, pub struct UnbufferedTokenStream<'input, T: TokenSource<'input>> { token_source: T, - pub(crate) tokens: Vec>, + pub(crate) tokens: Vec<>::Tok>, //todo prev token for lt(-1) pub(crate) current_token_index: isize, markers_count: isize, @@ -88,11 +90,11 @@ impl<'input, T: TokenSource<'input>> UnbufferedTokenStream<'input, T> { pub(crate) fn fill(&mut self, need: isize) -> isize { for i in 0..need { - if self.tokens.len() > 0 && self.tokens.last().unwrap().get_token_type() == TOKEN_EOF { + if self.tokens.len() > 0 && self.tokens.last().unwrap().borrow().get_token_type() == TOKEN_EOF { return i; } let mut token = self.token_source.next_token(); - token.set_token_index(self.get_buffer_start_index() + self.tokens.len() as isize); + token.borrow().set_token_index(self.get_buffer_start_index() + self.tokens.len() as isize); self.tokens.push(token); } @@ -101,23 +103,27 @@ impl<'input, T: TokenSource<'input>> UnbufferedTokenStream<'input, T> { } impl<'input, T: TokenSource<'input>> TokenStream<'input> for UnbufferedTokenStream<'input, T> { - type Tok = T::Tok; + type TF = T::TF; - fn lt(&mut self, i: isize) -> Option<&T::Tok> { + fn lt(&mut self, i: isize) -> Option<&>::Inner> { if i == -1 { - return self.tokens.get(self.p as usize - 1).map(Deref::deref) + return self.tokens.get(self.p as usize - 1).map(Borrow::borrow) } self.sync(i); - self.tokens.get((self.p + i - 1) as usize).map(Deref::deref) + self.tokens.get((self.p + i - 1) as usize).map(Borrow::borrow) } - fn get(&self, index: isize) -> &Self::Tok { - self.tokens[(index - self.get_buffer_start_index()) as usize].as_ref() + fn get(&self, index: isize) -> &>::Inner { + self.tokens[(index - self.get_buffer_start_index()) as usize].borrow() } - fn get_token_source(&self) -> &dyn TokenSource<'input, Tok=T::Tok> { + fn get_cloned(&self, index: isize) -> >::Tok { + self.tokens[(index - self.get_buffer_start_index()) as usize].clone() + } + + fn get_token_source(&self) -> &dyn TokenSource<'input, TF=Self::TF> { &self.token_source } @@ -140,7 +146,7 @@ impl<'input, T: TokenSource<'input>> TokenStream<'input> for UnbufferedTokenStre let mut buf = String::new(); for i in a..(b + 1) { - let t = &self.tokens[i as usize]; + let t = self.tokens[i as usize].borrow(); if t.get_token_type() == TOKEN_EOF { break } buf.push_str(t.get_text()); } @@ -187,15 +193,16 @@ impl<'input, T: TokenSource<'input>> IntStream for UnbufferedTokenStream<'input, if self.markers_count == 0 { if self.p > 0 { //todo rewrite properly as safe code, this is completely wrong - unsafe { - // might be UB if 2p > len? - // copy_nonoverlapping( - std::intrinsics::copy( - &self.tokens[self.p as usize] as *const Box, - &mut self.tokens[0] as *mut Box, - self.tokens.len() - self.p as usize, - ) - } + // unsafe { + // might be UB if 2p > len? + // copy_nonoverlapping( + // std::intrinsics::copy( + // &self.tokens[self.p as usize] as *const T::Tok, + // &mut self.tokens[0] as *mut T::Tok, + // self.tokens.len() - self.p as usize, + // ) + // } + unimplemented!() } } } @@ -231,7 +238,7 @@ impl<'input, T: TokenSource<'input>> IntStream for UnbufferedTokenStream<'input, // pub(crate) struct DynTokenStream<'input,T:TokenStream<'input>>(pub T); // // impl<'input,T:TokenStream<'input>> TokenStream<'input> for DynTokenStream<'input,T>{ -// type Tok = dyn Token + 'input; +// type TF = Box; // // fn lt(&mut self, k: isize) -> Option<&Self::Tok> { // match self.0.lt(k){