tree-sitter-grammars
diff --git a/‎tree-sitter-markdown-inline/src/scanner.c‎
Lines changed: 123 additions & 116 deletions b/‎tree-sitter-markdown-inline/src/scanner.c‎
Lines changed: 123 additions & 116 deletions
@@ -21,11 +21,8 @@ typedef enum {
 
 // Determines if a character is punctuation as defined by the markdown spec.
 static bool is_punctuation(char chr) {
-    return
-        (chr >= '!' && chr <= '/') ||
-        (chr >= ':' && chr <= '@') ||
-        (chr >= '[' && chr <= '`') ||
-        (chr >= '{' && chr <= '~');
+    return (chr >= '!' && chr <= '/') || (chr >= ':' && chr <= '@') ||
+           (chr >= '[' && chr <= '`') || (chr >= '{' && chr <= '~');
 }
 
 // State bitflags used with `Scanner.state`
@@ -35,27 +32,30 @@ static const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3;
 // Current delimiter run is opening
 static const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2;
 
-// Convenience function to emit the error token. This is done to stop invalid parse branches.
-// Specifically:
-// 1. When encountering a newline after a line break that ended a paragraph, and no new block
+// Convenience function to emit the error token. This is done to stop invalid
+// parse branches. Specifically:
+// 1. When encountering a newline after a line break that ended a paragraph, and
+// no new block
 //    has been opened.
 // 2. When encountering a new block after a soft line break.
-// 3. When a `$._trigger_error` token is valid, which is used to stop parse branches through
+// 3. When a `$._trigger_error` token is valid, which is used to stop parse
+// branches through
 //    normal tree-sitter grammar rules.
 //
-// See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in grammar.js
+// See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in
+// grammar.js
 static bool error(TSLexer *lexer) {
     lexer->result_symbol = ERROR;
     return true;
 }
 
 typedef struct {
-
     // Parser state flags
     uint8_t state;
     uint8_t code_span_delimiter_length;
     uint8_t latex_span_delimiter_length;
-    // The number of characters remaining in the currrent emphasis delimiter run.
+    // The number of characters remaining in the currrent emphasis delimiter
+    // run.
     uint8_t num_emphasis_delimiters_left;
 
 } Scanner;
@@ -86,8 +86,11 @@ static void deserialize(Scanner *s, const char *buffer, unsigned length) {
     }
 }
 
-static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, const bool *valid_symbols,
-                                 const char delimiter, const TokenType open_token, const TokenType close_token) {
+static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t *delimiter_length,
+                                 const bool *valid_symbols,
+                                 const char delimiter,
+                                 const TokenType open_token,
+                                 const TokenType close_token) {
     uint8_t level = 0;
     while (lexer->lookahead == delimiter) {
         lexer->advance(lexer, false);
@@ -127,24 +130,30 @@ static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, cons
     return false;
 }
 
-static bool parse_backtick(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
-    return parse_leaf_delimiter(lexer, &s->code_span_delimiter_length, valid_symbols, '`',
-                                CODE_SPAN_START, CODE_SPAN_CLOSE);
+static bool parse_backtick(Scanner *s, TSLexer *lexer,
+                           const bool *valid_symbols) {
+    return parse_leaf_delimiter(lexer, &s->code_span_delimiter_length,
+                                valid_symbols, '`', CODE_SPAN_START,
+                                CODE_SPAN_CLOSE);
 }
 
-static bool parse_dollar(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
-    return parse_leaf_delimiter(lexer, &s->latex_span_delimiter_length, valid_symbols, '$',
-                                LATEX_SPAN_START, LATEX_SPAN_CLOSE);
+static bool parse_dollar(Scanner *s, TSLexer *lexer,
+                         const bool *valid_symbols) {
+    return parse_leaf_delimiter(lexer, &s->latex_span_delimiter_length,
+                                valid_symbols, '$', LATEX_SPAN_START,
+                                LATEX_SPAN_CLOSE);
 }
 
 static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
     lexer->advance(lexer, false);
-    // If `num_emphasis_delimiters_left` is not zero then we already decided that this should be
-    // part of an emphasis delimiter run, so interpret it as such.
+    // If `num_emphasis_delimiters_left` is not zero then we already decided
+    // that this should be part of an emphasis delimiter run, so interpret it as
+    // such.
     if (s->num_emphasis_delimiters_left > 0) {
-        // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it should be open
-        // or close.
-        if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) && valid_symbols[EMPHASIS_OPEN_STAR]) {
+        // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
+        // should be open or close.
+        if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
+            valid_symbols[EMPHASIS_OPEN_STAR]) {
             s->state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN);
             lexer->result_symbol = EMPHASIS_OPEN_STAR;
             s->num_emphasis_delimiters_left--;
@@ -163,37 +172,32 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
         star_count++;
         lexer->advance(lexer, false);
     }
-    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' || lexer->eof(lexer);
-    if (valid_symbols[EMPHASIS_OPEN_STAR] || valid_symbols[EMPHASIS_CLOSE_STAR]) {
-        // The desicion made for the first star also counts for all the following stars in the
-        // delimiter run. Rembemer how many there are.
+    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
+                    lexer->eof(lexer);
+    if (valid_symbols[EMPHASIS_OPEN_STAR] ||
+        valid_symbols[EMPHASIS_CLOSE_STAR]) {
+        // The desicion made for the first star also counts for all the
+        // following stars in the delimiter run. Rembemer how many there are.
         s->num_emphasis_delimiters_left = star_count - 1;
-        // Look ahead to the next symbol (after the last star) to find out if it is whitespace
-        // punctuation or other.
-        bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
+        // Look ahead to the next symbol (after the last star) to find out if it
+        // is whitespace punctuation or other.
+        bool next_symbol_whitespace =
+            line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
         bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
-        // Information about the last token is in valid_symbols. See grammar.js for these
-        // tokens for how this is done.
-        if (
-            valid_symbols[EMPHASIS_CLOSE_STAR] &&
-            !valid_symbols[LAST_TOKEN_WHITESPACE] && (
-                !valid_symbols[LAST_TOKEN_PUNCTUATION] ||
-                next_symbol_punctuation ||
-                next_symbol_whitespace
-            )
-        ) {
+        // Information about the last token is in valid_symbols. See grammar.js
+        // for these tokens for how this is done.
+        if (valid_symbols[EMPHASIS_CLOSE_STAR] &&
+            !valid_symbols[LAST_TOKEN_WHITESPACE] &&
+            (!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+             next_symbol_punctuation || next_symbol_whitespace)) {
             // Closing delimiters take precedence
             s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
             lexer->result_symbol = EMPHASIS_CLOSE_STAR;
             return true;
         }
-        if (
-            !next_symbol_whitespace && (
-                !next_symbol_punctuation ||
-                valid_symbols[LAST_TOKEN_PUNCTUATION] ||
-                valid_symbols[LAST_TOKEN_WHITESPACE]
-            )
-        ) {
+        if (!next_symbol_whitespace && (!next_symbol_punctuation ||
+                                        valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+                                        valid_symbols[LAST_TOKEN_WHITESPACE])) {
             s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
             lexer->result_symbol = EMPHASIS_OPEN_STAR;
             return true;
@@ -204,12 +208,14 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
 
 static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
     lexer->advance(lexer, false);
-    // If `num_emphasis_delimiters_left` is not zero then we already decided that this should be
-    // part of an emphasis delimiter run, so interpret it as such.
+    // If `num_emphasis_delimiters_left` is not zero then we already decided
+    // that this should be part of an emphasis delimiter run, so interpret it as
+    // such.
     if (s->num_emphasis_delimiters_left > 0) {
-        // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it should be open
-        // or close.
-        if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) && valid_symbols[STRIKETHROUGH_OPEN]) {
+        // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
+        // should be open or close.
+        if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
+            valid_symbols[STRIKETHROUGH_OPEN]) {
             s->state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN);
             lexer->result_symbol = STRIKETHROUGH_OPEN;
             s->num_emphasis_delimiters_left--;
@@ -228,37 +234,32 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
         star_count++;
         lexer->advance(lexer, false);
     }
-    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' || lexer->eof(lexer);
-    if (valid_symbols[STRIKETHROUGH_OPEN] || valid_symbols[STRIKETHROUGH_CLOSE]) {
-        // The desicion made for the first star also counts for all the following stars in the
-        // delimiter run. Rembemer how many there are.
+    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
+                    lexer->eof(lexer);
+    if (valid_symbols[STRIKETHROUGH_OPEN] ||
+        valid_symbols[STRIKETHROUGH_CLOSE]) {
+        // The desicion made for the first star also counts for all the
+        // following stars in the delimiter run. Rembemer how many there are.
         s->num_emphasis_delimiters_left = star_count - 1;
-        // Look ahead to the next symbol (after the last star) to find out if it is whitespace
-        // punctuation or other.
-        bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
+        // Look ahead to the next symbol (after the last star) to find out if it
+        // is whitespace punctuation or other.
+        bool next_symbol_whitespace =
+            line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
         bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
-        // Information about the last token is in valid_symbols. See grammar.js for these
-        // tokens for how this is done.
-        if (
-            valid_symbols[STRIKETHROUGH_CLOSE] &&
-            !valid_symbols[LAST_TOKEN_WHITESPACE] && (
-                !valid_symbols[LAST_TOKEN_PUNCTUATION] ||
-                next_symbol_punctuation ||
-                next_symbol_whitespace
-            )
-        ) {
+        // Information about the last token is in valid_symbols. See grammar.js
+        // for these tokens for how this is done.
+        if (valid_symbols[STRIKETHROUGH_CLOSE] &&
+            !valid_symbols[LAST_TOKEN_WHITESPACE] &&
+            (!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+             next_symbol_punctuation || next_symbol_whitespace)) {
             // Closing delimiters take precedence
             s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
             lexer->result_symbol = STRIKETHROUGH_CLOSE;
             return true;
         }
-        if (
-            !next_symbol_whitespace && (
-                !next_symbol_punctuation ||
-                valid_symbols[LAST_TOKEN_PUNCTUATION] ||
-                valid_symbols[LAST_TOKEN_WHITESPACE]
-            )
-        ) {
+        if (!next_symbol_whitespace && (!next_symbol_punctuation ||
+                                        valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+                                        valid_symbols[LAST_TOKEN_WHITESPACE])) {
             s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
             lexer->result_symbol = STRIKETHROUGH_OPEN;
             return true;
@@ -267,14 +268,17 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
     return false;
 }
 
-static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
+static bool parse_underscore(Scanner *s, TSLexer *lexer,
+                             const bool *valid_symbols) {
     lexer->advance(lexer, false);
-    // If `num_emphasis_delimiters_left` is not zero then we already decided that this should be
-    // part of an emphasis delimiter run, so interpret it as such.
+    // If `num_emphasis_delimiters_left` is not zero then we already decided
+    // that this should be part of an emphasis delimiter run, so interpret it as
+    // such.
     if (s->num_emphasis_delimiters_left > 0) {
-        // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it should be open
-        // or close.
-        if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) && valid_symbols[EMPHASIS_OPEN_UNDERSCORE]) {
+        // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
+        // should be open or close.
+        if ((s->state & STATE_EMPHASIS_DELIMITER_IS_OPEN) &&
+            valid_symbols[EMPHASIS_OPEN_UNDERSCORE]) {
             lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
             s->num_emphasis_delimiters_left--;
             return true;
@@ -292,21 +296,30 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
         underscore_count++;
         lexer->advance(lexer, false);
     }
-    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' || lexer->eof(lexer);
-    if (valid_symbols[EMPHASIS_OPEN_UNDERSCORE] || valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
+    bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
+                    lexer->eof(lexer);
+    if (valid_symbols[EMPHASIS_OPEN_UNDERSCORE] ||
+        valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
         s->num_emphasis_delimiters_left = underscore_count - 1;
-        bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
+        bool next_symbol_whitespace =
+            line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
         bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
-        bool right_flanking = !valid_symbols[LAST_TOKEN_WHITESPACE] &&
-            (!valid_symbols[LAST_TOKEN_PUNCTUATION] || next_symbol_punctuation || next_symbol_whitespace);
-        bool left_flanking = !next_symbol_whitespace &&
-            (!next_symbol_punctuation || valid_symbols[LAST_TOKEN_PUNCTUATION] || valid_symbols[LAST_TOKEN_WHITESPACE]);
-        if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE] && right_flanking && (!left_flanking || next_symbol_punctuation)) {
+        bool right_flanking =
+            !valid_symbols[LAST_TOKEN_WHITESPACE] &&
+            (!valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+             next_symbol_punctuation || next_symbol_whitespace);
+        bool left_flanking =
+            !next_symbol_whitespace && (!next_symbol_punctuation ||
+                                        valid_symbols[LAST_TOKEN_PUNCTUATION] ||
+                                        valid_symbols[LAST_TOKEN_WHITESPACE]);
+        if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE] && right_flanking &&
+            (!left_flanking || next_symbol_punctuation)) {
             s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
             lexer->result_symbol = EMPHASIS_CLOSE_UNDERSCORE;
             return true;
         }
-        if (left_flanking && (!right_flanking || valid_symbols[LAST_TOKEN_PUNCTUATION])) {
+        if (left_flanking &&
+            (!right_flanking || valid_symbols[LAST_TOKEN_PUNCTUATION])) {
             s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
             lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
             return true;
@@ -316,25 +329,26 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
 }
 
 static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
-    // A normal tree-sitter rule decided that the current branch is invalid and now "requests"
-    // an error to stop the branch
+    // A normal tree-sitter rule decided that the current branch is invalid and
+    // now "requests" an error to stop the branch
     if (valid_symbols[TRIGGER_ERROR]) {
         return error(lexer);
     }
 
-    // Decide which tokens to consider based on the first non-whitespace character
+    // Decide which tokens to consider based on the first non-whitespace
+    // character
     switch (lexer->lookahead) {
         case '`':
-            // A backtick could mark the beginning or ending of a code span or a fenced
-            // code block.
+            // A backtick could mark the beginning or ending of a code span or a
+            // fenced code block.
             return parse_backtick(s, lexer, valid_symbols);
         case '$':
-            return parse_dollar(s,lexer, valid_symbols);
+            return parse_dollar(s, lexer, valid_symbols);
         case '*':
-            // A star could either mark the beginning or ending of emphasis, a list item or
-            // thematic break.
-            // This code is similar to the code for '_' and '+'.
-            return parse_star(s,lexer, valid_symbols);
+            // A star could either mark the beginning or ending of emphasis, a
+            // list item or thematic break. This code is similar to the code for
+            // '_' and '+'.
+            return parse_star(s, lexer, valid_symbols);
         case '_':
             return parse_underscore(s, lexer, valid_symbols);
         case '~':
@@ -350,27 +364,20 @@ void *tree_sitter_markdown_inline_external_scanner_create() {
 }
 
 bool tree_sitter_markdown_inline_external_scanner_scan(
-    void *payload,
-    TSLexer *lexer,
-    const bool *valid_symbols
-) {
+    void *payload, TSLexer *lexer, const bool *valid_symbols) {
     Scanner *scanner = (Scanner *)payload;
     return scan(scanner, lexer, valid_symbols);
 }
 
-unsigned tree_sitter_markdown_inline_external_scanner_serialize(
-    void *payload,
-    char* buffer
-) {
+unsigned tree_sitter_markdown_inline_external_scanner_serialize(void *payload,
+                                                                char *buffer) {
     Scanner *scanner = (Scanner *)payload;
     return serialize(scanner, buffer);
 }
 
-void tree_sitter_markdown_inline_external_scanner_deserialize(
-    void *payload,
-    char* buffer,
-    unsigned length
-) {
+void tree_sitter_markdown_inline_external_scanner_deserialize(void *payload,
+                                                              char *buffer,
+                                                              unsigned length) {
     Scanner *scanner = (Scanner *)payload;
     deserialize(scanner, buffer, length);
 }