Skip to content

Commit

Permalink
Start on Span refactor: replace Tokens with SoA
Browse files Browse the repository at this point in the history
  • Loading branch information
VonTum committed Jan 31, 2024
1 parent 3d7cc13 commit 81269a9
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 91 deletions.
2 changes: 1 addition & 1 deletion src/dev_aid/lsp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ fn do_syntax_highlight(file_data : &FileData, linker : &Linker) -> (SemanticToke
let typ = get_semantic_token_type_from_ide_token(ide_tok);
let mod_bits = get_modifiers_for_token(ide_tok);

let tok_range = file_data.tokens[tok_idx].get_range();
let tok_range = file_data.tokens.get_token_range(tok_idx);
let whitespace_text = &file_text[cur_whitespace_start..tok_range.start];
cur_whitespace_start = tok_range.end;
let token_text = &file_text[tok_range];
Expand Down
19 changes: 9 additions & 10 deletions src/dev_aid/syntax_highlighting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,21 @@ fn pretty_print_chunk_with_whitespace(whitespace_start : usize, file_text : &str
print!("{}{}", whitespace_text, st.apply_to(&file_text[text_span]));
}

fn print_tokens(file_text : &str, tokens : &[Token]) {
fn print_tokens(file_text : &str, tokens : &TokenizeResult) {
let mut whitespace_start : usize = 0;
for (tok_idx, token) in tokens.iter().enumerate() {
for tok_idx in 0..tokens.len() {
let styles = [Style::new().magenta(), Style::new().yellow(), Style::new().blue()];
let st = styles[tok_idx % styles.len()].clone().underlined();

let token_range = token.get_range();
let token_range = tokens.get_token_range(tok_idx);
pretty_print_chunk_with_whitespace(whitespace_start, file_text, token_range.clone(), st);
whitespace_start = token_range.end;
}

print!("{}\n", &file_text[whitespace_start..file_text.len()]);
}

fn pretty_print(file_text : &str, tokens : &[Token], ide_infos : &[IDEToken]) {
fn pretty_print(file_text : &str, tokens : &TokenizeResult, ide_infos : &[IDEToken]) {
let mut whitespace_start : usize = 0;

for (tok_idx, token) in ide_infos.iter().enumerate() {
Expand All @@ -85,7 +85,7 @@ fn pretty_print(file_text : &str, tokens : &[Token], ide_infos : &[IDEToken]) {
}
};

let tok_span = tokens[tok_idx].get_range();
let tok_span = tokens.get_token_range(tok_idx);
pretty_print_chunk_with_whitespace(whitespace_start, file_text, tok_span.clone(), st);
whitespace_start = tok_span.end;
}
Expand Down Expand Up @@ -162,8 +162,7 @@ pub fn create_token_ide_info<'a>(parsed: &FileData, linker : &Linker) -> Vec<IDE
let mut result : Vec<IDEToken> = Vec::new();
result.reserve(parsed.tokens.len());

for t in &parsed.tokens {
let tok_typ = t.get_type();
for &tok_typ in &parsed.tokens.token_types {
let initial_typ = if is_keyword(tok_typ) {
IDETokenType::Keyword
} else if is_bracket(tok_typ) != IsBracket::NotABracket {
Expand Down Expand Up @@ -196,14 +195,14 @@ pub fn create_token_ide_info<'a>(parsed: &FileData, linker : &Linker) -> Vec<IDE
}

// Outputs character_offsets.len() == tokens.len() + 1 to include EOF token
fn generate_character_offsets(file_text : &str, tokens : &[Token]) -> Vec<Range<usize>> {
fn generate_character_offsets(file_text : &str, tokens : &TokenizeResult) -> Vec<Range<usize>> {
let mut character_offsets : Vec<Range<usize>> = Vec::new();
character_offsets.reserve(tokens.len());

let mut cur_char = 0;
let mut whitespace_start = 0;
for tok in tokens {
let tok_range = tok.get_range();
for tok_idx in 0..tokens.len() {
let tok_range = tokens.get_token_range(tok_idx);

// whitespace
cur_char += file_text[whitespace_start..tok_range.start].chars().count();
Expand Down
6 changes: 3 additions & 3 deletions src/linker.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{collections::{HashMap, HashSet}, rc::Rc, cell::RefCell};

use crate::{ast::{Module, LinkInfo, Span}, arena_alloc::{ArenaAllocator, UUID, UUIDMarker}, parser::{FullParseResult, TokenTreeNode}, tokenizer::Token, errors::{ErrorCollector, error_info}, flattening::FlattenedModule, util::{const_str_position, const_str_position_in_tuples}, instantiation::InstantiatedModule, value::Value, typing::Type};
use crate::{arena_alloc::{ArenaAllocator, UUID, UUIDMarker}, ast::{Module, LinkInfo, Span}, errors::{ErrorCollector, error_info}, flattening::FlattenedModule, instantiation::InstantiatedModule, parser::{FullParseResult, TokenTreeNode}, tokenizer::TokenizeResult, typing::Type, util::{const_str_position, const_str_position_in_tuples}, value::Value};

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct ModuleUUIDMarker;
Expand Down Expand Up @@ -119,15 +119,15 @@ impl Linkable for NamedType {

pub struct FileData {
pub file_text : String,
pub tokens : Vec<Token>,
pub tokens : TokenizeResult,
pub token_hierarchy : Vec<TokenTreeNode>,
pub parsing_errors : ErrorCollector,
pub associated_values : Vec<NameElem>
}

impl FileData {
fn get_token_text(&self, token_idx : usize) -> &str {
&self.file_text[self.tokens[token_idx].get_range()]
&self.file_text[self.tokens.get_token_range(token_idx)]
}
}

Expand Down
81 changes: 40 additions & 41 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,20 @@ struct TokenContent {
}

pub enum TokenTreeNode {
PlainToken(Token, usize), // Has the index of the given token to the global Token array
PlainToken{tok_typ : TokenTypeIdx, range : Range<usize>, tok_idx : usize}, // Has the index of the given token to the global Token array
// Code between '{' and '}', '(' and ')', or '[' and ']' exclusive. Contains sublist of tokens, index of open, index of close bracket
Block(TokenTypeIdx, Vec<Self>, Span),
}
impl TokenTreeNode {
fn get_token_type(&self) -> TokenTypeIdx {
match self {
Self::PlainToken(tok, _pos) => tok.get_type(),
Self::PlainToken{tok_typ, range : _, tok_idx : _} => *tok_typ,
Self::Block(typ, _content, _span) => *typ
}
}
fn get_span(&self) -> Span {
match self {
Self::PlainToken(_typ, pos) => Span::from(*pos),
Self::PlainToken{tok_typ: _, range : _, tok_idx} => Span::from(*tok_idx),
Self::Block(_typ, _content, span) => *span
}
}
Expand All @@ -51,46 +51,45 @@ struct TokenHierarchyStackElem {
parent : Vec<TokenTreeNode>
}

pub fn to_token_hierarchy(tokens : &[Token], errors : &ErrorCollector) -> Vec<TokenTreeNode> {
pub fn to_token_hierarchy(tokens : &TokenizeResult, errors : &ErrorCollector) -> Vec<TokenTreeNode> {
let mut cur_token_slab : Vec<TokenTreeNode> = Vec::new();
let mut stack : Vec<TokenHierarchyStackElem> = Vec::new(); // Type of opening bracket, token position, Token Subtree

for (idx, &tok) in tokens.iter().enumerate() {
let tok_typ = tok.get_type();
for (tok_idx, &tok_typ) in tokens.token_types.iter().enumerate() {
if tok_typ == TOKEN_COMMENT || tok_typ == TOKEN_INVALID { // At this stage the comments are filtered out
continue;
}
match is_bracket(tok_typ) {
IsBracket::Open => {
stack.push(TokenHierarchyStackElem{open_bracket : tok_typ, open_bracket_pos : idx, parent : cur_token_slab});
stack.push(TokenHierarchyStackElem{open_bracket : tok_typ, open_bracket_pos : tok_idx, parent : cur_token_slab});
cur_token_slab = Vec::new();
},
IsBracket::Close => {
loop { // Loop for bracket stack unrolling, for correct code only runs once
if let Some(cur_block) = stack.pop() {
if closes(cur_block.open_bracket, tok_typ) { // All is well. This bracket was closed properly. Happy path!
let mut parent_cur_token_slab = cur_block.parent;
parent_cur_token_slab.push(TokenTreeNode::Block(cur_block.open_bracket, cur_token_slab, Span(cur_block.open_bracket_pos, idx)));
parent_cur_token_slab.push(TokenTreeNode::Block(cur_block.open_bracket, cur_token_slab, Span(cur_block.open_bracket_pos, tok_idx)));
cur_token_slab = parent_cur_token_slab;
break;
} else {
if !stack.iter().any(|prev_bracket| closes(prev_bracket.open_bracket, tok_typ)) { // Any bracket in the stack closes this?
error_unopened_bracket(idx, tok_typ, cur_block.open_bracket_pos, errors);
error_unopened_bracket(tok_idx, tok_typ, cur_block.open_bracket_pos, errors);
stack.push(cur_block); // Push the previous bracket back onto bracket stack, as we disregarded erroneous closing bracket
break;
} else {
error_unclosed_bracket(cur_block.open_bracket_pos, tokens[cur_block.open_bracket_pos].get_type(), idx, errors);
error_unclosed_bracket(cur_block.open_bracket_pos, tokens.token_types[cur_block.open_bracket_pos], tok_idx, errors);
}
}
} else {
// Too many close brackets
errors.error_basic(Span::from(idx), "A close bracket had no corresponding opening bracket.");
errors.error_basic(Span::from(tok_idx), "A close bracket had no corresponding opening bracket.");
break;
}
}
},
IsBracket::NotABracket => {
cur_token_slab.push(TokenTreeNode::PlainToken(tok, idx));
cur_token_slab.push(TokenTreeNode::PlainToken{tok_typ, range : tokens.get_token_range(tok_idx), tok_idx});
}
}
}
Expand Down Expand Up @@ -170,18 +169,18 @@ impl<'it> TokenStream<'it> {
}
}
fn peek_is_plain(&mut self, expected : TokenTypeIdx) -> bool {
if let Some(TokenTreeNode::PlainToken(tok, _place)) = self.iter.peek() {
if tok.get_type() == expected {
if let Some(TokenTreeNode::PlainToken{tok_typ, range : _, tok_idx : _}) = self.iter.peek() {
if *tok_typ == expected {
return true;
}
}
false
}
fn eat_is_plain(&mut self, expected : TokenTypeIdx) -> Option<TokenContent> {
if let Some(TokenTreeNode::PlainToken(tok, pos)) = self.peek() {
if tok.get_type() == expected {
if let Some(TokenTreeNode::PlainToken{tok_typ, range, tok_idx}) = self.peek() {
if *tok_typ == expected {
self.next();
return Some(TokenContent{position : *pos, text : tok.get_range()});
return Some(TokenContent{position : *tok_idx, text : range.clone()});
}
}
None
Expand Down Expand Up @@ -240,8 +239,8 @@ impl<'file> ASTParserContext<'file> {
None => {
self.errors.error_basic(Span::from(unexpected_eof_idx), format!("Unexpected End of Scope while parsing {context}. Expected {expected_list_str}"))
}
Some(TokenTreeNode::PlainToken(tok, pos)) => {
self.error_unexpected_token_str(expected_list_str, tok.get_type(), *pos, context);
Some(TokenTreeNode::PlainToken{tok_typ, range: _, tok_idx}) => {
self.error_unexpected_token_str(expected_list_str, *tok_typ, *tok_idx, context);
}
Some(TokenTreeNode::Block(typ, _, span)) => {
let tok_typ_name = get_token_type_name(*typ);
Expand All @@ -255,8 +254,8 @@ impl<'file> ASTParserContext<'file> {
assert!(is_bracket(expected) == IsBracket::NotABracket);

match token_stream.next() {
Some(TokenTreeNode::PlainToken(tok, idx)) if tok.get_type() == expected => {
Some(TokenContent{position : *idx, text : tok.get_range()})
Some(TokenTreeNode::PlainToken{tok_typ, range, tok_idx}) if *tok_typ == expected => {
Some(TokenContent{position : *tok_idx, text : range.clone()})
},
other => {
self.error_unexpected_tree_node(&[expected], other, token_stream.unexpected_eof_token, context);
Expand Down Expand Up @@ -313,23 +312,23 @@ impl<'file> ASTParserContext<'file> {
// For expression
fn parse_unit_expression(&mut self, token_stream : &mut TokenStream, scope : &LocalVariableContext) -> Option<SpanExpression> {
let mut base_expr : (Expression, Span) = match token_stream.next() {
Some(TokenTreeNode::PlainToken(tok, pos)) if is_unary_operator(tok.get_type()) => {
Some(TokenTreeNode::PlainToken{tok_typ, range: _, tok_idx}) if is_unary_operator(*tok_typ) => {
let found_expr = self.parse_unit_expression(token_stream, scope)?;
let new_span = Span(*pos, found_expr.1.1);
return Some((Expression::UnaryOp(Box::new((Operator{op_typ : tok.get_type()}, *pos, found_expr))), new_span));
let new_span = Span(*tok_idx, found_expr.1.1);
return Some((Expression::UnaryOp(Box::new((Operator{op_typ : *tok_typ}, *tok_idx, found_expr))), new_span));
},
Some(TokenTreeNode::PlainToken(tok, pos)) if tok.get_type() == TOKEN_IDENTIFIER => {
let ident_ref = if let Some(local_idx) = scope.get_declaration_for(&self.file_text[tok.get_range()]) {
Some(TokenTreeNode::PlainToken{tok_typ, range, tok_idx}) if *tok_typ == TOKEN_IDENTIFIER => {
let ident_ref = if let Some(local_idx) = scope.get_declaration_for(&self.file_text[range.clone()]) {
LocalOrGlobal::Local(local_idx)
} else {
// todo namespacing and shit
LocalOrGlobal::Global(Span::from(*pos))
LocalOrGlobal::Global(Span::from(*tok_idx))
};
(Expression::Named(ident_ref), Span::from(*pos))
(Expression::Named(ident_ref), Span::from(*tok_idx))
},
Some(TokenTreeNode::PlainToken(tok, pos)) if tok.get_type() == TOKEN_NUMBER => {
let value = &self.file_text[tok.get_range()];
(Expression::Constant(Value::Integer(BigInt::from_str(value).unwrap())), Span::from(*pos))
Some(TokenTreeNode::PlainToken{tok_typ, range, tok_idx}) if *tok_typ == TOKEN_NUMBER => {
let value = &self.file_text[range.clone()];
(Expression::Constant(Value::Integer(BigInt::from_str(value).unwrap())), Span::from(*tok_idx))
},
Some(TokenTreeNode::Block(typ, contents, span)) if *typ == kw("(") => {
let mut content_token_stream = TokenStream::new(contents, span.0, span.1);
Expand Down Expand Up @@ -390,10 +389,10 @@ impl<'file> ASTParserContext<'file> {
loop {
let mut grabbed_symbol = self.parse_unit_expression(token_stream, scope)?;
match token_stream.peek() {
Some(TokenTreeNode::PlainToken(tok, op_pos)) if is_operator(tok.get_type()) => {
Some(TokenTreeNode::PlainToken{tok_typ, range: _, tok_idx}) if is_operator(*tok_typ) => {
//let operator_prescedence = get_binary_operator_prescedence(*typ);
while let Some((left_expr, stack_op, stack_op_pos)) = stack.pop() {
if get_binary_operator_prescedence(stack_op) >= get_binary_operator_prescedence(tok.get_type()) {
if get_binary_operator_prescedence(stack_op) >= get_binary_operator_prescedence(*tok_typ) {
grabbed_symbol = Expression::new_binop(left_expr, Operator{op_typ : stack_op}, stack_op_pos, grabbed_symbol);
} else {
stack.push((left_expr, stack_op, stack_op_pos)); // oops, shouldn't have popped it
Expand All @@ -402,7 +401,7 @@ impl<'file> ASTParserContext<'file> {
}

token_stream.next(); // commit operator peek
stack.push((grabbed_symbol, tok.get_type(), *op_pos));
stack.push((grabbed_symbol, *tok_typ, *tok_idx));
},
_other => {
while let Some((left_expr, stack_op, stack_op_pos)) = stack.pop() {
Expand Down Expand Up @@ -552,15 +551,15 @@ impl<'file> ASTParserContext<'file> {
}
}
match token_stream.next() {
Some(TokenTreeNode::PlainToken(tok, _pos)) if tok.get_type() == kw(",") => {
Some(TokenTreeNode::PlainToken{tok_typ, range:_, tok_idx:_}) if *tok_typ == kw(",") => {
continue; // parse next declaration
}
Some(TokenTreeNode::PlainToken(tok, assign_pos)) if tok.get_type() == kw("=") => {
Some(TokenTreeNode::PlainToken{tok_typ, range:_, tok_idx}) if *tok_typ == kw("=") => {
// Ends the loop
// T a, T b = x(y);
return self.parse_statement_handle_assignment(left_expressions, *assign_pos, token_stream, scope, &mut code_block.statements, start_at);
return self.parse_statement_handle_assignment(left_expressions, *tok_idx, token_stream, scope, &mut code_block.statements, start_at);
}
Some(TokenTreeNode::PlainToken(tok, _pos)) if tok.get_type() == kw(";") => {
Some(TokenTreeNode::PlainToken{tok_typ, range:_, tok_idx:_}) if *tok_typ == kw(";") => {
// Ends the loop
return self.parse_statement_handle_end(left_expressions, all_decls, &mut code_block.statements);
}
Expand Down Expand Up @@ -756,8 +755,8 @@ impl<'file> ASTParserContext<'file> {

while let Some(t) = outer_token_iter.next() {
match t {
TokenTreeNode::PlainToken(tok, module_kw_pos) if tok.get_type() == kw("module") => {
if let Some(module) = self.parse_module(outer_token_iter, *module_kw_pos) {
TokenTreeNode::PlainToken{tok_typ, range:_, tok_idx} if *tok_typ == kw("module") => {
if let Some(module) = self.parse_module(outer_token_iter, *tok_idx) {
modules.push(module);
}
},
Expand All @@ -783,7 +782,7 @@ pub fn parse<'nums, 'g, 'file>(token_hierarchy : &Vec<TokenTreeNode>, file_text

pub struct FullParseResult {
pub file_text : String,
pub tokens : Vec<Token>,
pub tokens : TokenizeResult,
pub token_hierarchy : Vec<TokenTreeNode>,
pub ast : ASTRoot
}
Expand Down
Loading

0 comments on commit 81269a9

Please sign in to comment.