@@ -21,11 +21,8 @@ typedef enum {
2121
2222// Determines if a character is punctuation as defined by the markdown spec.
2323static bool is_punctuation (char chr ) {
24- return
25- (chr >= '!' && chr <= '/' ) ||
26- (chr >= ':' && chr <= '@' ) ||
27- (chr >= '[' && chr <= '`' ) ||
28- (chr >= '{' && chr <= '~' );
24+ return (chr >= '!' && chr <= '/' ) || (chr >= ':' && chr <= '@' ) ||
25+ (chr >= '[' && chr <= '`' ) || (chr >= '{' && chr <= '~' );
2926}
3027
3128// State bitflags used with `Scanner.state`
@@ -35,27 +32,30 @@ static const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3;
3532// Current delimiter run is opening
3633static const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2 ;
3734
38- // Convenience function to emit the error token. This is done to stop invalid parse branches.
39- // Specifically:
40- // 1. When encountering a newline after a line break that ended a paragraph, and no new block
35+ // Convenience function to emit the error token. This is done to stop invalid
36+ // parse branches. Specifically:
37+ // 1. When encountering a newline after a line break that ended a paragraph, and
38+ // no new block
4139// has been opened.
4240// 2. When encountering a new block after a soft line break.
43- // 3. When a `$._trigger_error` token is valid, which is used to stop parse branches through
41+ // 3. When a `$._trigger_error` token is valid, which is used to stop parse
42+ // branches through
4443// normal tree-sitter grammar rules.
4544//
46- // See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in grammar.js
45+ // See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in
46+ // grammar.js
4747static bool error (TSLexer * lexer ) {
4848 lexer -> result_symbol = ERROR ;
4949 return true;
5050}
5151
5252typedef struct {
53-
5453 // Parser state flags
5554 uint8_t state ;
5655 uint8_t code_span_delimiter_length ;
5756 uint8_t latex_span_delimiter_length ;
58- // The number of characters remaining in the currrent emphasis delimiter run.
57+ // The number of characters remaining in the currrent emphasis delimiter
58+ // run.
5959 uint8_t num_emphasis_delimiters_left ;
6060
6161} Scanner ;
@@ -86,8 +86,11 @@ static void deserialize(Scanner *s, const char *buffer, unsigned length) {
8686 }
8787}
8888
89- static bool parse_leaf_delimiter (TSLexer * lexer , uint8_t * delimiter_length , const bool * valid_symbols ,
90- const char delimiter , const TokenType open_token , const TokenType close_token ) {
89+ static bool parse_leaf_delimiter (TSLexer * lexer , uint8_t * delimiter_length ,
90+ const bool * valid_symbols ,
91+ const char delimiter ,
92+ const TokenType open_token ,
93+ const TokenType close_token ) {
9194 uint8_t level = 0 ;
9295 while (lexer -> lookahead == delimiter ) {
9396 lexer -> advance (lexer , false);
@@ -127,24 +130,30 @@ static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, cons
127130 return false;
128131}
129132
130- static bool parse_backtick (Scanner * s , TSLexer * lexer , const bool * valid_symbols ) {
131- return parse_leaf_delimiter (lexer , & s -> code_span_delimiter_length , valid_symbols , '`' ,
132- CODE_SPAN_START , CODE_SPAN_CLOSE );
133+ static bool parse_backtick (Scanner * s , TSLexer * lexer ,
134+ const bool * valid_symbols ) {
135+ return parse_leaf_delimiter (lexer , & s -> code_span_delimiter_length ,
136+ valid_symbols , '`' , CODE_SPAN_START ,
137+ CODE_SPAN_CLOSE );
133138}
134139
135- static bool parse_dollar (Scanner * s , TSLexer * lexer , const bool * valid_symbols ) {
136- return parse_leaf_delimiter (lexer , & s -> latex_span_delimiter_length , valid_symbols , '$' ,
137- LATEX_SPAN_START , LATEX_SPAN_CLOSE );
140+ static bool parse_dollar (Scanner * s , TSLexer * lexer ,
141+ const bool * valid_symbols ) {
142+ return parse_leaf_delimiter (lexer , & s -> latex_span_delimiter_length ,
143+ valid_symbols , '$' , LATEX_SPAN_START ,
144+ LATEX_SPAN_CLOSE );
138145}
139146
140147static bool parse_star (Scanner * s , TSLexer * lexer , const bool * valid_symbols ) {
141148 lexer -> advance (lexer , false);
142- // If `num_emphasis_delimiters_left` is not zero then we already decided that this should be
143- // part of an emphasis delimiter run, so interpret it as such.
149+ // If `num_emphasis_delimiters_left` is not zero then we already decided
150+ // that this should be part of an emphasis delimiter run, so interpret it as
151+ // such.
144152 if (s -> num_emphasis_delimiters_left > 0 ) {
145- // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it should be open
146- // or close.
147- if ((s -> state & STATE_EMPHASIS_DELIMITER_IS_OPEN ) && valid_symbols [EMPHASIS_OPEN_STAR ]) {
153+ // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
154+ // should be open or close.
155+ if ((s -> state & STATE_EMPHASIS_DELIMITER_IS_OPEN ) &&
156+ valid_symbols [EMPHASIS_OPEN_STAR ]) {
148157 s -> state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN );
149158 lexer -> result_symbol = EMPHASIS_OPEN_STAR ;
150159 s -> num_emphasis_delimiters_left -- ;
@@ -163,37 +172,32 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
163172 star_count ++ ;
164173 lexer -> advance (lexer , false);
165174 }
166- bool line_end = lexer -> lookahead == '\n' || lexer -> lookahead == '\r' || lexer -> eof (lexer );
167- if (valid_symbols [EMPHASIS_OPEN_STAR ] || valid_symbols [EMPHASIS_CLOSE_STAR ]) {
168- // The desicion made for the first star also counts for all the following stars in the
169- // delimiter run. Rembemer how many there are.
175+ bool line_end = lexer -> lookahead == '\n' || lexer -> lookahead == '\r' ||
176+ lexer -> eof (lexer );
177+ if (valid_symbols [EMPHASIS_OPEN_STAR ] ||
178+ valid_symbols [EMPHASIS_CLOSE_STAR ]) {
179+ // The desicion made for the first star also counts for all the
180+ // following stars in the delimiter run. Rembemer how many there are.
170181 s -> num_emphasis_delimiters_left = star_count - 1 ;
171- // Look ahead to the next symbol (after the last star) to find out if it is whitespace
172- // punctuation or other.
173- bool next_symbol_whitespace = line_end || lexer -> lookahead == ' ' || lexer -> lookahead == '\t' ;
182+ // Look ahead to the next symbol (after the last star) to find out if it
183+ // is whitespace punctuation or other.
184+ bool next_symbol_whitespace =
185+ line_end || lexer -> lookahead == ' ' || lexer -> lookahead == '\t' ;
174186 bool next_symbol_punctuation = is_punctuation ((char )lexer -> lookahead );
175- // Information about the last token is in valid_symbols. See grammar.js for these
176- // tokens for how this is done.
177- if (
178- valid_symbols [EMPHASIS_CLOSE_STAR ] &&
179- !valid_symbols [LAST_TOKEN_WHITESPACE ] && (
180- !valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
181- next_symbol_punctuation ||
182- next_symbol_whitespace
183- )
184- ) {
187+ // Information about the last token is in valid_symbols. See grammar.js
188+ // for these tokens for how this is done.
189+ if (valid_symbols [EMPHASIS_CLOSE_STAR ] &&
190+ !valid_symbols [LAST_TOKEN_WHITESPACE ] &&
191+ (!valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
192+ next_symbol_punctuation || next_symbol_whitespace )) {
185193 // Closing delimiters take precedence
186194 s -> state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN ;
187195 lexer -> result_symbol = EMPHASIS_CLOSE_STAR ;
188196 return true;
189197 }
190- if (
191- !next_symbol_whitespace && (
192- !next_symbol_punctuation ||
193- valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
194- valid_symbols [LAST_TOKEN_WHITESPACE ]
195- )
196- ) {
198+ if (!next_symbol_whitespace && (!next_symbol_punctuation ||
199+ valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
200+ valid_symbols [LAST_TOKEN_WHITESPACE ])) {
197201 s -> state |= STATE_EMPHASIS_DELIMITER_IS_OPEN ;
198202 lexer -> result_symbol = EMPHASIS_OPEN_STAR ;
199203 return true;
@@ -204,12 +208,14 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
204208
205209static bool parse_tilde (Scanner * s , TSLexer * lexer , const bool * valid_symbols ) {
206210 lexer -> advance (lexer , false);
207- // If `num_emphasis_delimiters_left` is not zero then we already decided that this should be
208- // part of an emphasis delimiter run, so interpret it as such.
211+ // If `num_emphasis_delimiters_left` is not zero then we already decided
212+ // that this should be part of an emphasis delimiter run, so interpret it as
213+ // such.
209214 if (s -> num_emphasis_delimiters_left > 0 ) {
210- // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it should be open
211- // or close.
212- if ((s -> state & STATE_EMPHASIS_DELIMITER_IS_OPEN ) && valid_symbols [STRIKETHROUGH_OPEN ]) {
215+ // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
216+ // should be open or close.
217+ if ((s -> state & STATE_EMPHASIS_DELIMITER_IS_OPEN ) &&
218+ valid_symbols [STRIKETHROUGH_OPEN ]) {
213219 s -> state &= (~STATE_EMPHASIS_DELIMITER_IS_OPEN );
214220 lexer -> result_symbol = STRIKETHROUGH_OPEN ;
215221 s -> num_emphasis_delimiters_left -- ;
@@ -228,37 +234,32 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
228234 star_count ++ ;
229235 lexer -> advance (lexer , false);
230236 }
231- bool line_end = lexer -> lookahead == '\n' || lexer -> lookahead == '\r' || lexer -> eof (lexer );
232- if (valid_symbols [STRIKETHROUGH_OPEN ] || valid_symbols [STRIKETHROUGH_CLOSE ]) {
233- // The desicion made for the first star also counts for all the following stars in the
234- // delimiter run. Rembemer how many there are.
237+ bool line_end = lexer -> lookahead == '\n' || lexer -> lookahead == '\r' ||
238+ lexer -> eof (lexer );
239+ if (valid_symbols [STRIKETHROUGH_OPEN ] ||
240+ valid_symbols [STRIKETHROUGH_CLOSE ]) {
241+ // The desicion made for the first star also counts for all the
242+ // following stars in the delimiter run. Rembemer how many there are.
235243 s -> num_emphasis_delimiters_left = star_count - 1 ;
236- // Look ahead to the next symbol (after the last star) to find out if it is whitespace
237- // punctuation or other.
238- bool next_symbol_whitespace = line_end || lexer -> lookahead == ' ' || lexer -> lookahead == '\t' ;
244+ // Look ahead to the next symbol (after the last star) to find out if it
245+ // is whitespace punctuation or other.
246+ bool next_symbol_whitespace =
247+ line_end || lexer -> lookahead == ' ' || lexer -> lookahead == '\t' ;
239248 bool next_symbol_punctuation = is_punctuation ((char )lexer -> lookahead );
240- // Information about the last token is in valid_symbols. See grammar.js for these
241- // tokens for how this is done.
242- if (
243- valid_symbols [STRIKETHROUGH_CLOSE ] &&
244- !valid_symbols [LAST_TOKEN_WHITESPACE ] && (
245- !valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
246- next_symbol_punctuation ||
247- next_symbol_whitespace
248- )
249- ) {
249+ // Information about the last token is in valid_symbols. See grammar.js
250+ // for these tokens for how this is done.
251+ if (valid_symbols [STRIKETHROUGH_CLOSE ] &&
252+ !valid_symbols [LAST_TOKEN_WHITESPACE ] &&
253+ (!valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
254+ next_symbol_punctuation || next_symbol_whitespace )) {
250255 // Closing delimiters take precedence
251256 s -> state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN ;
252257 lexer -> result_symbol = STRIKETHROUGH_CLOSE ;
253258 return true;
254259 }
255- if (
256- !next_symbol_whitespace && (
257- !next_symbol_punctuation ||
258- valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
259- valid_symbols [LAST_TOKEN_WHITESPACE ]
260- )
261- ) {
260+ if (!next_symbol_whitespace && (!next_symbol_punctuation ||
261+ valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
262+ valid_symbols [LAST_TOKEN_WHITESPACE ])) {
262263 s -> state |= STATE_EMPHASIS_DELIMITER_IS_OPEN ;
263264 lexer -> result_symbol = STRIKETHROUGH_OPEN ;
264265 return true;
@@ -267,14 +268,17 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
267268 return false;
268269}
269270
270- static bool parse_underscore (Scanner * s , TSLexer * lexer , const bool * valid_symbols ) {
271+ static bool parse_underscore (Scanner * s , TSLexer * lexer ,
272+ const bool * valid_symbols ) {
271273 lexer -> advance (lexer , false);
272- // If `num_emphasis_delimiters_left` is not zero then we already decided that this should be
273- // part of an emphasis delimiter run, so interpret it as such.
274+ // If `num_emphasis_delimiters_left` is not zero then we already decided
275+ // that this should be part of an emphasis delimiter run, so interpret it as
276+ // such.
274277 if (s -> num_emphasis_delimiters_left > 0 ) {
275- // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it should be open
276- // or close.
277- if ((s -> state & STATE_EMPHASIS_DELIMITER_IS_OPEN ) && valid_symbols [EMPHASIS_OPEN_UNDERSCORE ]) {
278+ // The `STATE_EMPHASIS_DELIMITER_IS_OPEN` state flag tells us wether it
279+ // should be open or close.
280+ if ((s -> state & STATE_EMPHASIS_DELIMITER_IS_OPEN ) &&
281+ valid_symbols [EMPHASIS_OPEN_UNDERSCORE ]) {
278282 lexer -> result_symbol = EMPHASIS_OPEN_UNDERSCORE ;
279283 s -> num_emphasis_delimiters_left -- ;
280284 return true;
@@ -292,21 +296,30 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
292296 underscore_count ++ ;
293297 lexer -> advance (lexer , false);
294298 }
295- bool line_end = lexer -> lookahead == '\n' || lexer -> lookahead == '\r' || lexer -> eof (lexer );
296- if (valid_symbols [EMPHASIS_OPEN_UNDERSCORE ] || valid_symbols [EMPHASIS_CLOSE_UNDERSCORE ]) {
299+ bool line_end = lexer -> lookahead == '\n' || lexer -> lookahead == '\r' ||
300+ lexer -> eof (lexer );
301+ if (valid_symbols [EMPHASIS_OPEN_UNDERSCORE ] ||
302+ valid_symbols [EMPHASIS_CLOSE_UNDERSCORE ]) {
297303 s -> num_emphasis_delimiters_left = underscore_count - 1 ;
298- bool next_symbol_whitespace = line_end || lexer -> lookahead == ' ' || lexer -> lookahead == '\t' ;
304+ bool next_symbol_whitespace =
305+ line_end || lexer -> lookahead == ' ' || lexer -> lookahead == '\t' ;
299306 bool next_symbol_punctuation = is_punctuation ((char )lexer -> lookahead );
300- bool right_flanking = !valid_symbols [LAST_TOKEN_WHITESPACE ] &&
301- (!valid_symbols [LAST_TOKEN_PUNCTUATION ] || next_symbol_punctuation || next_symbol_whitespace );
302- bool left_flanking = !next_symbol_whitespace &&
303- (!next_symbol_punctuation || valid_symbols [LAST_TOKEN_PUNCTUATION ] || valid_symbols [LAST_TOKEN_WHITESPACE ]);
304- if (valid_symbols [EMPHASIS_CLOSE_UNDERSCORE ] && right_flanking && (!left_flanking || next_symbol_punctuation )) {
307+ bool right_flanking =
308+ !valid_symbols [LAST_TOKEN_WHITESPACE ] &&
309+ (!valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
310+ next_symbol_punctuation || next_symbol_whitespace );
311+ bool left_flanking =
312+ !next_symbol_whitespace && (!next_symbol_punctuation ||
313+ valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
314+ valid_symbols [LAST_TOKEN_WHITESPACE ]);
315+ if (valid_symbols [EMPHASIS_CLOSE_UNDERSCORE ] && right_flanking &&
316+ (!left_flanking || next_symbol_punctuation )) {
305317 s -> state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN ;
306318 lexer -> result_symbol = EMPHASIS_CLOSE_UNDERSCORE ;
307319 return true;
308320 }
309- if (left_flanking && (!right_flanking || valid_symbols [LAST_TOKEN_PUNCTUATION ])) {
321+ if (left_flanking &&
322+ (!right_flanking || valid_symbols [LAST_TOKEN_PUNCTUATION ])) {
310323 s -> state |= STATE_EMPHASIS_DELIMITER_IS_OPEN ;
311324 lexer -> result_symbol = EMPHASIS_OPEN_UNDERSCORE ;
312325 return true;
@@ -316,25 +329,26 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
316329}
317330
318331static bool scan (Scanner * s , TSLexer * lexer , const bool * valid_symbols ) {
319- // A normal tree-sitter rule decided that the current branch is invalid and now "requests"
320- // an error to stop the branch
332+ // A normal tree-sitter rule decided that the current branch is invalid and
333+ // now "requests" an error to stop the branch
321334 if (valid_symbols [TRIGGER_ERROR ]) {
322335 return error (lexer );
323336 }
324337
325- // Decide which tokens to consider based on the first non-whitespace character
338+ // Decide which tokens to consider based on the first non-whitespace
339+ // character
326340 switch (lexer -> lookahead ) {
327341 case '`' :
328- // A backtick could mark the beginning or ending of a code span or a fenced
329- // code block.
342+ // A backtick could mark the beginning or ending of a code span or a
343+ // fenced code block.
330344 return parse_backtick (s , lexer , valid_symbols );
331345 case '$' :
332- return parse_dollar (s ,lexer , valid_symbols );
346+ return parse_dollar (s , lexer , valid_symbols );
333347 case '*' :
334- // A star could either mark the beginning or ending of emphasis, a list item or
335- // thematic break.
336- // This code is similar to the code for '_' and '+'.
337- return parse_star (s ,lexer , valid_symbols );
348+ // A star could either mark the beginning or ending of emphasis, a
349+ // list item or thematic break. This code is similar to the code for
350+ // '_' and '+'.
351+ return parse_star (s , lexer , valid_symbols );
338352 case '_' :
339353 return parse_underscore (s , lexer , valid_symbols );
340354 case '~' :
@@ -350,27 +364,20 @@ void *tree_sitter_markdown_inline_external_scanner_create() {
350364}
351365
352366bool tree_sitter_markdown_inline_external_scanner_scan (
353- void * payload ,
354- TSLexer * lexer ,
355- const bool * valid_symbols
356- ) {
367+ void * payload , TSLexer * lexer , const bool * valid_symbols ) {
357368 Scanner * scanner = (Scanner * )payload ;
358369 return scan (scanner , lexer , valid_symbols );
359370}
360371
361- unsigned tree_sitter_markdown_inline_external_scanner_serialize (
362- void * payload ,
363- char * buffer
364- ) {
372+ unsigned tree_sitter_markdown_inline_external_scanner_serialize (void * payload ,
373+ char * buffer ) {
365374 Scanner * scanner = (Scanner * )payload ;
366375 return serialize (scanner , buffer );
367376}
368377
369- void tree_sitter_markdown_inline_external_scanner_deserialize (
370- void * payload ,
371- char * buffer ,
372- unsigned length
373- ) {
378+ void tree_sitter_markdown_inline_external_scanner_deserialize (void * payload ,
379+ char * buffer ,
380+ unsigned length ) {
374381 Scanner * scanner = (Scanner * )payload ;
375382 deserialize (scanner , buffer , length );
376383}
0 commit comments