Skip to content

Commit

Permalink
feat: performance improvement by lazy python
Browse files Browse the repository at this point in the history
- make comments and texts getters
  • Loading branch information
benfdking committed Jan 17, 2025
1 parent 43eb0d9 commit 4efe27f
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 30 deletions.
64 changes: 40 additions & 24 deletions sqlglotrs/src/token.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
use crate::settings::TokenType;
use pyo3::prelude::PyListMethods;
use pyo3::types::{PyList, PyNone, PyString};
use pyo3::{pyclass, IntoPy, Py, PyObject, Python};
use pyo3::{pyclass, pymethods, IntoPy, PyObject, Python};

#[derive(Debug)]
#[pyclass]
pub struct Token {
#[pyo3(get, name = "token_type_index")]
pub token_type: TokenType,
#[pyo3(get, set, name = "token_type")]
pub token_type_py: PyObject,
#[pyo3(get)]
pub text: Py<PyString>,
pub token_type_index: TokenType,
pub token_type: Option<PyObject>,
#[pyo3(get)]
pub text: String,
#[pyo3(get)]
pub line: usize,
#[pyo3(get)]
Expand All @@ -20,8 +18,7 @@ pub struct Token {
pub start: usize,
#[pyo3(get)]
pub end: usize,
#[pyo3(get)]
pub comments: Py<PyList>,
pub comments: Vec<String>,
}

impl Token {
Expand All @@ -34,26 +31,45 @@ impl Token {
end: usize,
comments: Vec<String>,
) -> Token {
Python::with_gil(|py| Token {
token_type,
token_type_py: PyNone::get_bound(py).into_py(py),
text: PyString::new_bound(py, &text).into_py(py),
Token {
token_type_index: token_type,
token_type: None,
text,
line,
col,
start,
end,
comments: PyList::new_bound(py, &comments).into(),
})
comments,
}
}

pub fn append_comments(&mut self, new_comments: &mut Vec<String>) {
self.comments.append(new_comments);
}
}

#[pymethods]
impl Token {
#[getter(comments)]
fn comments(&self, py: Python) -> Vec<String> {
self.comments.clone()
}

#[getter(text)]
fn text(&self) -> &str {
&self.text
}

#[getter(token_type)]
fn token_type(&self, py: Python) -> PyObject {
match &self.token_type {
Some(token_type) => token_type.clone_ref(py),
None => PyNone::get_bound(py).into_py(py),
}
}

pub fn append_comments(&self, comments: &mut Vec<String>) {
Python::with_gil(|py| {
let pylist = self.comments.bind(py);
for comment in comments.drain(..) {
if let Err(_) = pylist.append(comment) {
panic!("Failed to append comments to the Python list");
}
}
});
#[setter(token_type)]
fn set_token_type(&mut self, token_type: PyObject) {
self.token_type = Some(token_type);
}
}
16 changes: 10 additions & 6 deletions sqlglotrs/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ impl<'a> TokenizerState<'a> {
|| self
.settings
.command_prefix_tokens
.contains(&self.tokens[self.tokens.len() - 2].token_type))
.contains(&self.tokens[self.tokens.len() - 2].token_type_index))
{
let start = self.current;
let tokens_len = self.tokens.len();
Expand Down Expand Up @@ -401,7 +401,7 @@ impl<'a> TokenizerState<'a> {
&& self
.settings
.tokens_preceding_hint
.contains(&self.tokens.last().unwrap().token_type)
.contains(&self.tokens.last().unwrap().token_type_index)
{
self.add(self.token_types.hint, None)?;
}
Expand Down Expand Up @@ -503,7 +503,9 @@ impl<'a> TokenizerState<'a> {
if self.peek_char.is_ascii_digit() {
self.advance(1)?;
} else if self.peek_char == '.' && !decimal {
if self.tokens.last().map(|t| t.token_type) == Some(self.token_types.parameter) {
if self.tokens.last().map(|t| t.token_type_index)
== Some(self.token_types.parameter)
{
return self.add(self.token_types.number, None);
}
decimal = true;
Expand Down Expand Up @@ -534,7 +536,8 @@ impl<'a> TokenizerState<'a> {
.numeric_literals
.get(&literal.to_uppercase())
.unwrap_or(&String::from("")),
).copied();
)
.copied();

let replaced = literal.replace("_", "");

Expand Down Expand Up @@ -598,12 +601,13 @@ impl<'a> TokenizerState<'a> {
}

let token_type =
if self.tokens.last().map(|t| t.token_type) == Some(self.token_types.parameter) {
if self.tokens.last().map(|t| t.token_type_index) == Some(self.token_types.parameter) {
self.token_types.var
} else {
self.settings
.keywords
.get(&self.text().to_uppercase()).copied()
.get(&self.text().to_uppercase())
.copied()
.unwrap_or(self.token_types.var)
};
self.add(token_type, None)
Expand Down

0 comments on commit 4efe27f

Please sign in to comment.