Skip to content

Commit

Permalink
feat: ensure tags have the same starting and ending names
Browse files Browse the repository at this point in the history
  • Loading branch information
amaanq committed Feb 20, 2024
1 parent 2743ff8 commit c2306c6
Show file tree
Hide file tree
Showing 8 changed files with 4,065 additions and 3,505 deletions.
63 changes: 37 additions & 26 deletions common/scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,33 @@ enum TokenType {
CDATA,
XML_MODEL,
XML_STYLESHEET,
START_TAG_NAME,
END_TAG_NAME,
ERRONEOUS_END_NAME,
SELF_CLOSING_TAG_DELIMITER,
IMPLICIT_END_TAG,
};

// BUG: see cursorless-dev/vscode-parse-tree#74

/// Check if the character is a letter
#define isalpha(chr) \
(((chr) >= 'A' && (chr) <= 'Z') || \
((chr) >= 'a' && (chr) <= 'z'))
#define isalpha(chr) (((chr) >= 'A' && (chr) <= 'Z') || ((chr) >= 'a' && (chr) <= 'z'))

/// Check if the character is alphanumeric
#define isalnum(chr) \
(isalpha(chr) || ((chr) >= '0' && (chr) <= '9'))
#define isalnum(chr) (isalpha(chr) || ((chr) >= '0' && (chr) <= '9'))

/// Advance the lexer if the next token doesn't match the given character
#define advance_if_not(lexer, chr) \
if ((lexer)->lookahead != (chr)) return false; advance((lexer))
#define advance_if_not(lexer, chr) \
if ((lexer)->lookahead != (chr)) \
return false; \
advance((lexer))

/// Advance the lexer to the next token
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }

/// Skip the current token
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }

/// Check if the character is valid in PITarget
/// @private
static inline bool is_valid_pi_char(int32_t chr) {
Expand Down Expand Up @@ -60,8 +67,7 @@ static bool scan_pi_target(TSLexer *lexer, const bool *valid_symbols) {

if (advanced_once) {
while (is_valid_pi_char(lexer->lookahead)) {
if (found_x_first &&
(lexer->lookahead == 'm' || lexer->lookahead == 'M')) {
if (found_x_first && (lexer->lookahead == 'm' || lexer->lookahead == 'M')) {
advance(lexer);
if (lexer->lookahead == 'l' || lexer->lookahead == 'L') {
advance(lexer);
Expand All @@ -70,8 +76,10 @@ static bool scan_pi_target(TSLexer *lexer, const bool *valid_symbols) {
bool last_char_hyphen = lexer->lookahead == '-';
advance(lexer);
if (last_char_hyphen) {
if (valid_symbols[XML_MODEL] && check_word(lexer, "model")) return false;
if (valid_symbols[XML_STYLESHEET] && check_word(lexer, "stylesheet")) return false;
if (valid_symbols[XML_MODEL] && check_word(lexer, "model"))
return false;
if (valid_symbols[XML_STYLESHEET] && check_word(lexer, "stylesheet"))
return false;
}
} else {
return false;
Expand All @@ -93,15 +101,18 @@ static bool scan_pi_target(TSLexer *lexer, const bool *valid_symbols) {

/// Scan for the content of a PI node
static bool scan_pi_content(TSLexer *lexer) {
while (!lexer->eof(lexer) && lexer->lookahead != '\n' && lexer->lookahead != '?') advance(lexer);
while (!lexer->eof(lexer) && lexer->lookahead != '\n' && lexer->lookahead != '?')
advance(lexer);

if (lexer->lookahead != '?') return false;
if (lexer->lookahead != '?')
return false;
lexer->mark_end(lexer);
advance(lexer);

if (lexer->lookahead == '>') {
advance(lexer);
while (lexer->lookahead == ' ') advance(lexer);
while (lexer->lookahead == ' ')
advance(lexer);
advance_if_not(lexer, '\n');
lexer->result_symbol = PI_CONTENT;
return true;
Expand All @@ -111,9 +122,11 @@ static bool scan_pi_content(TSLexer *lexer) {
}

/// Scan for a Comment node
static bool scan_comment(TSLexer *lexer) {
advance_if_not(lexer, '<');
advance_if_not(lexer, '!');
static bool scan_comment(TSLexer *lexer, bool xml) {
if (!xml) {
advance_if_not(lexer, '<');
advance_if_not(lexer, '!');
}
advance_if_not(lexer, '-');
advance_if_not(lexer, '-');

Expand All @@ -140,13 +153,11 @@ static bool scan_comment(TSLexer *lexer) {
}

/// Define the boilerplate functions of the scanner
#define SCANNER_BOILERPLATE(name) \
void *tree_sitter_##name##_external_scanner_create() { return NULL; } \
\
void tree_sitter_##name##_external_scanner_destroy(void *payload) {} \
\
void tree_sitter_##name##_external_scanner_reset(void *payload) {} \
\
unsigned tree_sitter_##name##_external_scanner_serialize(void *payload, char *buffer) { return 0; } \
\
#define SCANNER_BOILERPLATE(name) \
void *tree_sitter_##name##_external_scanner_create() { return NULL; } \
\
void tree_sitter_##name##_external_scanner_destroy(void *payload) {} \
\
unsigned tree_sitter_##name##_external_scanner_serialize(void *payload, char *buffer) { return 0; } \
\
void tree_sitter_##name##_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {}
2 changes: 1 addition & 1 deletion dtd/src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ bool tree_sitter_dtd_external_scanner_scan(void *payload, TSLexer *lexer, const

if (valid_symbols[PI_CONTENT]) return scan_pi_content(lexer);

if (valid_symbols[COMMENT]) return scan_comment(lexer);
if (valid_symbols[COMMENT]) return scan_comment(lexer, false);

return false;
}
Expand Down
19 changes: 16 additions & 3 deletions xml/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ module.exports = grammar(DTD, {
$.CData,
'xml-model',
'xml-stylesheet',

$._start_tag_name,
$._end_tag_name,
$._erroneous_end_name,
'/>',
$._implicit_end_tag,
],

inline: $ => [
Expand Down Expand Up @@ -107,7 +113,7 @@ module.exports = grammar(DTD, {

EmptyElemTag: $ => seq(
'<',
$.Name,
alias($._start_tag_name, $.Name),
c.rseq($._S, $.Attribute),
O($._S),
'/>'
Expand All @@ -117,13 +123,20 @@ module.exports = grammar(DTD, {

STag: $ => seq(
'<',
$.Name,
alias($._start_tag_name, $.Name),
c.rseq($._S, $.Attribute),
O($._S),
'>'
),

ETag: $ => seq('</', $.Name, O($._S), '>'),
ETag: $ => seq('</', alias($._end_tag_name, $.Name), O($._S), '>'),

ErroneousETag: $ => seq(
'</',
alias($._erroneous_end_name, $.ErroneousName),
O($._S),
'>',
),

content: $ => repeat1(
choice(
Expand Down
82 changes: 75 additions & 7 deletions xml/src/grammar.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions xml/src/node-types.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit c2306c6

Please sign in to comment.