Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate tree-sitter for parsing. #1

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,14 @@ out

# Mac OS
.DS_Store

# Tree Sitter
src/tree_sitter/**
src/grammar.json
src/node-types.json
src/parser.c
parser.dylib

# Project
asts/**
zqlout/**
10 changes: 10 additions & 0 deletions examples/create_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CREATE TABLE employees (
id INTEGER PRIMARY KEY,
first_name VARCHAR(50) NOT NULL,
last_name VARCHAR(50) NOT NULL,
email VARCHAR(100) UNIQUE,
salary DECIMAL DEFAULT 50000,
hire_date DATE NOT NULL,
department_id INT REFERENCES departments(id),
is_active BOOLEAN DEFAULT TRUE
);
2 changes: 2 additions & 0 deletions examples/delete.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DELETE FROM employees
WHERE termination_date < '2023-01-01';
18 changes: 18 additions & 0 deletions examples/function_call.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
SELECT
column_a AS a,
12,
func(a, b),
otherFunc(
a,
1,
b,
2
) AS yeah
FROM table_a ace
WHERE (
a = 1
) OR (
b = 2
)
LIMIT 5
;
4 changes: 4 additions & 0 deletions examples/inner_join_left_outer_join.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
SELECT e.first_name, e.last_name, d.department_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.id
LEFT OUTER JOIN locations l ON d.location_id = l.id;
4 changes: 4 additions & 0 deletions examples/insert_multiple_values.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
INSERT INTO employees (first_name, last_name, salary)
VALUES
('Jane', 'Smith', 80000),
('Bob', 'Johnson', 65000);
3 changes: 3 additions & 0 deletions examples/insert_select.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
INSERT INTO employee_archive
SELECT * FROM employees
WHERE termination_date IS NOT NULL;
2 changes: 2 additions & 0 deletions examples/insert_single_value.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
INSERT INTO employees (first_name, last_name, salary)
VALUES ('John', 'Doe', 75000);
7 changes: 7 additions & 0 deletions examples/query_clauses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
SELECT first_name, last_name, salary AS annual_pay
FROM employees
WHERE department_id = 100
GROUP BY department_id
HAVING AVG(salary) > 50000
ORDER BY last_name DESC
LIMIT 10;
14 changes: 14 additions & 0 deletions examples/select_aggregate_clauses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
SELECT
d.department_name,
COUNT(*) AS employee_count,
AVG(e.salary) AS avg_salary,
MAX(e.hire_date) AS latest_hire
FROM employees e
LEFT JOIN departments d ON e.department_id = d.id
WHERE e.salary > 50000
AND e.hire_date >= '2022-01-01'
AND (e.first_name LIKE 'J%' OR e.last_name LIKE 'S%')
GROUP BY d.department_name
HAVING COUNT(*) > 5
ORDER BY avg_salary DESC
LIMIT 5;
3 changes: 3 additions & 0 deletions examples/select_from.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT a
FROM table_a
;
1 change: 1 addition & 0 deletions examples/select_one.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 1;
1 change: 1 addition & 0 deletions examples/select_star.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT * FROM employees;
4 changes: 4 additions & 0 deletions examples/update.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
UPDATE employees
SET salary = salary * 1.1,
last_modified = CURRENT_TIMESTAMP
WHERE department_id = 100;
254 changes: 254 additions & 0 deletions grammars/basesql/grammar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
/**
* This is a tree-sitter grammar for ANSI SQL.
* @file Common functionality shared across SQL dialects.
* @author Vinesh Kannan and Tamjid Rahman
* @license MIT
*/

/// <reference types="tree-sitter-cli/dsl" />

export default grammar({
name: "basesql",

rules: {
source_file: ($) => repeat($.statement),

terminal: () => token(";"),

statement: ($) =>
seq(
choice(
$.select_statement,
$.insert_statement,
$.update_statement,
$.delete_statement,
$.create_table_statement
),
optional($.terminal)
),

select_token: () => token("SELECT"),

// SELECT statement and its components
select_statement: ($) =>
seq(
$.select_token,
$.select_elements,
optional($.from_clause),
optional($.where_clause),
optional($.group_by_clause),
optional($.having_clause),
optional($.order_by_clause),
optional($.limit_clause)
),

select_elements: ($) => choice("*", commaSep1($.select_element)),

select_element: ($) => seq($.expression, optional($.alias_suffix)),

alias_suffix: ($) =>
choice(
// WITH 'AS' keyword
seq($.alias_token, $.identifier),
// WITHOUT 'AS' keyword
$.identifier
),

alias_token: ($) => token("AS"),

from_token: () => token("FROM"),

from_clause: ($) => seq($.from_token, commaSep1($.table_reference)),

table_reference: ($) => choice($.table_alias, $.join_clause),

table_alias: ($) => seq($.table_name, optional($.alias_suffix)),

join_clause: ($) => prec.left(2, seq($.table_reference, $.join_list)),

join_table: ($) =>
prec.left(
seq(
choice(
"JOIN",
seq("LEFT", optional("OUTER"), "JOIN"),
seq("RIGHT", optional("OUTER"), "JOIN"),
seq("INNER", "JOIN"),
seq("FULL", optional("OUTER"), "JOIN")
),
$.table_reference,
"ON",
$.expression
)
),

join_list: ($) =>
prec.left(1, seq($.join_table, optional(seq(",", $.join_table)))),

where_clause: ($) => seq("WHERE", $.expression),

group_by_clause: ($) => seq("GROUP", "BY", commaSep1($.expression)),

having_clause: ($) => seq("HAVING", $.expression),

order_by_clause: ($) => seq("ORDER", "BY", commaSep1($.order_by_element)),

order_by_element: ($) => seq($.expression, optional(choice("ASC", "DESC"))),

limit_clause: ($) => seq("LIMIT", $.number),

// INSERT statement
insert_statement: ($) =>
seq(
"INSERT",
"INTO",
$.table_name,
seq(
optional(seq("(", commaSep1($.identifier), ")")),
choice(
// VALUES syntax
seq("VALUES", commaSep1(seq("(", commaSep1($.expression), ")"))),
// SELECT syntax
seq(
optional(seq("(", commaSep1($.identifier), ")")),
$.select_statement
)
)
)
),

// UPDATE statement
update_statement: ($) =>
seq(
"UPDATE",
$.table_name,
"SET",
commaSep1($.update_assignment),
optional($.where_clause)
),

update_assignment: ($) => seq($.identifier, "=", $.expression),

// DELETE statement
delete_statement: ($) =>
seq("DELETE", "FROM", $.table_name, optional($.where_clause)),

// CREATE TABLE statement
create_table_statement: ($) =>
seq(
"CREATE",
"TABLE",
$.table_name,
"(",
commaSep1($.column_definition),
")"
),

column_definition: ($) =>
seq($.identifier, $.data_type, repeat($.column_constraint)),

data_type: ($) =>
choice(
"INT",
"INTEGER",
"BIGINT",
"SMALLINT",
"VARCHAR",
"TEXT",
"CHAR",
"DATE",
"TIMESTAMP",
"BOOLEAN",
"FLOAT",
"DOUBLE",
"DECIMAL",
seq("VARCHAR", "(", $.number, ")"),
seq("CHAR", "(", $.number, ")")
),

column_constraint: ($) =>
choice(
"PRIMARY KEY",
"NOT NULL",
"UNIQUE",
seq("DEFAULT", $.expression),
seq("REFERENCES", $.table_name, optional(seq("(", $.identifier, ")")))
),

expression: ($) =>
prec(
0,
choice(
$.binary_expression,
$.unary_expression,
$.parenthesized_expression,
$.function_call,
$.column_reference,
$.literal
)
),

binary_expression: ($) =>
choice(
...[
["OR", 1],
["AND", 2],
["=", 3],
["!=", 3],
["<>", 3],
["<", 3],
["<=", 3],
[">", 3],
[">=", 3],
["LIKE", 3],
["+", 4],
["-", 4],
["*", 5],
["/", 5],
].map(([operator, precedence]) =>
prec.left(precedence, seq($.expression, operator, $.expression))
),
prec.left(3, seq($.expression, "IS", "NULL")),
prec.left(3, seq($.expression, "IS", "NOT", "NULL"))
),

unary_expression: ($) =>
choice(
prec(6, seq("NOT", $.expression)),
prec(6, seq("-", $.expression))
),

parenthesized_expression: ($) => seq("(", $.expression, ")"),

function_call: ($) =>
seq(
$.identifier,
"(",
choice("*", optional(commaSep1($.expression))),
")"
),

// Basic elements
column_reference: ($) =>
seq(optional(seq($.identifier, ".")), $.identifier),

table_name: ($) => seq(optional(seq($.identifier, ".")), $.identifier),

literal: ($) => choice($.string, $.number, $.boolean, $.null),

string: ($) => choice(seq("'", /[^']*/, "'"), seq('"', /[^"]*/, '"')),

number: ($) => /\d+(\.\d+)?/,

boolean: ($) => choice("TRUE", "FALSE"),

null: ($) => "NULL",

identifier: ($) => token(/[a-zA-Z_][a-zA-Z0-9_]*/),
},
});

// Helper functions for common patterns
function commaSep1(rule) {
return seq(rule, repeat(seq(",", rule)));
}
Loading