@@ -20,25 +20,20 @@ typedef enum {
2020} TokenType ;
2121
2222// Determines if a character is punctuation as defined by the markdown spec.
23- static bool is_punctuation (char c ) {
23+ static bool is_punctuation (char chr ) {
2424 return
25- (c >= '!' && c <= '/' ) ||
26- (c >= ':' && c <= '@' ) ||
27- (c >= '[' && c <= '`' ) ||
28- (c >= '{' && c <= '~' );
29- }
30-
31- // Determines if a character is ascii whitespace as defined by the markdown spec.
32- static bool is_whitespace (char c ) {
33- return c == ' ' || c == '\t' || c == '\n' || c == '\r' ;
25+ (chr >= '!' && chr <= '/' ) ||
26+ (chr >= ':' && chr <= '@' ) ||
27+ (chr >= '[' && chr <= '`' ) ||
28+ (chr >= '{' && chr <= '~' );
3429}
3530
3631// State bitflags used with `Scanner.state`
3732
3833// TODO
39- const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3 ;
34+ static const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3 ;
4035// Current delimiter run is opening
41- const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2 ;
36+ static const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2 ;
4237
4338// Convenience function to emit the error token. This is done to stop invalid parse branches.
4439// Specifically:
@@ -67,12 +62,12 @@ typedef struct {
6762
6863// Write the whole state of a Scanner to a byte buffer
6964static unsigned serialize (Scanner * s , char * buffer ) {
70- size_t i = 0 ;
71- buffer [i ++ ] = s -> state ;
72- buffer [i ++ ] = s -> code_span_delimiter_length ;
73- buffer [i ++ ] = s -> latex_span_delimiter_length ;
74- buffer [i ++ ] = s -> num_emphasis_delimiters_left ;
75- return i ;
65+ unsigned size = 0 ;
66+ buffer [size ++ ] = ( char ) s -> state ;
67+ buffer [size ++ ] = ( char ) s -> code_span_delimiter_length ;
68+ buffer [size ++ ] = ( char ) s -> latex_span_delimiter_length ;
69+ buffer [size ++ ] = ( char ) s -> num_emphasis_delimiters_left ;
70+ return size ;
7671}
7772
7873// Read the whole state of a Scanner from a byte buffer
@@ -83,17 +78,17 @@ static void deserialize(Scanner *s, const char *buffer, unsigned length) {
8378 s -> latex_span_delimiter_length = 0 ;
8479 s -> num_emphasis_delimiters_left = 0 ;
8580 if (length > 0 ) {
86- size_t i = 0 ;
87- s -> state = buffer [i ++ ];
88- s -> code_span_delimiter_length = buffer [i ++ ];
89- s -> latex_span_delimiter_length = buffer [i ++ ];
90- s -> num_emphasis_delimiters_left = buffer [i ++ ];
81+ size_t size = 0 ;
82+ s -> state = ( uint8_t ) buffer [size ++ ];
83+ s -> code_span_delimiter_length = ( uint8_t ) buffer [size ++ ];
84+ s -> latex_span_delimiter_length = ( uint8_t ) buffer [size ++ ];
85+ s -> num_emphasis_delimiters_left = ( uint8_t ) buffer [size ++ ];
9186 }
9287}
9388
9489static bool parse_leaf_delimiter (TSLexer * lexer , uint8_t * delimiter_length , const bool * valid_symbols ,
9590 const char delimiter , const TokenType open_token , const TokenType close_token ) {
96- size_t level = 0 ;
91+ uint8_t level = 0 ;
9792 while (lexer -> lookahead == delimiter ) {
9893 lexer -> advance (lexer , false);
9994 level ++ ;
@@ -103,7 +98,8 @@ static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, cons
10398 * delimiter_length = 0 ;
10499 lexer -> result_symbol = close_token ;
105100 return true;
106- } else if (valid_symbols [open_token ]) {
101+ }
102+ if (valid_symbols [open_token ]) {
107103 // Parse ahead to check if there is a closing delimiter
108104 size_t close_level = 0 ;
109105 while (!lexer -> eof (lexer )) {
@@ -113,17 +109,17 @@ static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, cons
113109 if (close_level == level ) {
114110 // Found a matching delimiter
115111 break ;
116- } else {
117- close_level = 0 ;
118112 }
113+ close_level = 0 ;
119114 }
120115 lexer -> advance (lexer , false);
121116 }
122117 if (close_level == level ) {
123118 * delimiter_length = level ;
124119 lexer -> result_symbol = open_token ;
125120 return true;
126- } else if (valid_symbols [UNCLOSED_SPAN ]) {
121+ }
122+ if (valid_symbols [UNCLOSED_SPAN ]) {
127123 lexer -> result_symbol = UNCLOSED_SPAN ;
128124 return true;
129125 }
@@ -153,15 +149,16 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
153149 lexer -> result_symbol = EMPHASIS_OPEN_STAR ;
154150 s -> num_emphasis_delimiters_left -- ;
155151 return true;
156- } else if (valid_symbols [EMPHASIS_CLOSE_STAR ]) {
152+ }
153+ if (valid_symbols [EMPHASIS_CLOSE_STAR ]) {
157154 lexer -> result_symbol = EMPHASIS_CLOSE_STAR ;
158155 s -> num_emphasis_delimiters_left -- ;
159156 return true;
160157 }
161158 }
162159 lexer -> mark_end (lexer );
163160 // Otherwise count the number of stars
164- size_t star_count = 1 ;
161+ uint8_t star_count = 1 ;
165162 while (lexer -> lookahead == '*' ) {
166163 star_count ++ ;
167164 lexer -> advance (lexer , false);
@@ -174,7 +171,7 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
174171 // Look ahead to the next symbol (after the last star) to find out if it is whitespace
175172 // punctuation or other.
176173 bool next_symbol_whitespace = line_end || lexer -> lookahead == ' ' || lexer -> lookahead == '\t' ;
177- bool next_symbol_punctuation = is_punctuation (lexer -> lookahead );
174+ bool next_symbol_punctuation = is_punctuation (( char ) lexer -> lookahead );
178175 // Information about the last token is in valid_symbols. See grammar.js for these
179176 // tokens for how this is done.
180177 if (
@@ -189,7 +186,8 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
189186 s -> state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN ;
190187 lexer -> result_symbol = EMPHASIS_CLOSE_STAR ;
191188 return true;
192- } else if (
189+ }
190+ if (
193191 !next_symbol_whitespace && (
194192 !next_symbol_punctuation ||
195193 valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
@@ -216,15 +214,16 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
216214 lexer -> result_symbol = STRIKETHROUGH_OPEN ;
217215 s -> num_emphasis_delimiters_left -- ;
218216 return true;
219- } else if (valid_symbols [STRIKETHROUGH_CLOSE ]) {
217+ }
218+ if (valid_symbols [STRIKETHROUGH_CLOSE ]) {
220219 lexer -> result_symbol = STRIKETHROUGH_CLOSE ;
221220 s -> num_emphasis_delimiters_left -- ;
222221 return true;
223222 }
224223 }
225224 lexer -> mark_end (lexer );
226225 // Otherwise count the number of tildes
227- size_t star_count = 1 ;
226+ uint8_t star_count = 1 ;
228227 while (lexer -> lookahead == '~' ) {
229228 star_count ++ ;
230229 lexer -> advance (lexer , false);
@@ -237,7 +236,7 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
237236 // Look ahead to the next symbol (after the last star) to find out if it is whitespace
238237 // punctuation or other.
239238 bool next_symbol_whitespace = line_end || lexer -> lookahead == ' ' || lexer -> lookahead == '\t' ;
240- bool next_symbol_punctuation = is_punctuation (lexer -> lookahead );
239+ bool next_symbol_punctuation = is_punctuation (( char ) lexer -> lookahead );
241240 // Information about the last token is in valid_symbols. See grammar.js for these
242241 // tokens for how this is done.
243242 if (
@@ -252,7 +251,8 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
252251 s -> state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN ;
253252 lexer -> result_symbol = STRIKETHROUGH_CLOSE ;
254253 return true;
255- } else if (
254+ }
255+ if (
256256 !next_symbol_whitespace && (
257257 !next_symbol_punctuation ||
258258 valid_symbols [LAST_TOKEN_PUNCTUATION ] ||
@@ -278,15 +278,16 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
278278 lexer -> result_symbol = EMPHASIS_OPEN_UNDERSCORE ;
279279 s -> num_emphasis_delimiters_left -- ;
280280 return true;
281- } else if (valid_symbols [EMPHASIS_CLOSE_UNDERSCORE ]) {
281+ }
282+ if (valid_symbols [EMPHASIS_CLOSE_UNDERSCORE ]) {
282283 lexer -> result_symbol = EMPHASIS_CLOSE_UNDERSCORE ;
283284 s -> num_emphasis_delimiters_left -- ;
284285 return true;
285286 }
286287 }
287288 lexer -> mark_end (lexer );
288289 // Otherwise count the number of stars
289- size_t underscore_count = 1 ;
290+ uint8_t underscore_count = 1 ;
290291 while (lexer -> lookahead == '_' ) {
291292 underscore_count ++ ;
292293 lexer -> advance (lexer , false);
@@ -295,7 +296,7 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
295296 if (valid_symbols [EMPHASIS_OPEN_UNDERSCORE ] || valid_symbols [EMPHASIS_CLOSE_UNDERSCORE ]) {
296297 s -> num_emphasis_delimiters_left = underscore_count - 1 ;
297298 bool next_symbol_whitespace = line_end || lexer -> lookahead == ' ' || lexer -> lookahead == '\t' ;
298- bool next_symbol_punctuation = is_punctuation (lexer -> lookahead );
299+ bool next_symbol_punctuation = is_punctuation (( char ) lexer -> lookahead );
299300 bool right_flanking = !valid_symbols [LAST_TOKEN_WHITESPACE ] &&
300301 (!valid_symbols [LAST_TOKEN_PUNCTUATION ] || next_symbol_punctuation || next_symbol_whitespace );
301302 bool left_flanking = !next_symbol_whitespace &&
@@ -304,7 +305,8 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
304305 s -> state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN ;
305306 lexer -> result_symbol = EMPHASIS_CLOSE_UNDERSCORE ;
306307 return true;
307- } else if (left_flanking && (!right_flanking || valid_symbols [LAST_TOKEN_PUNCTUATION ])) {
308+ }
309+ if (left_flanking && (!right_flanking || valid_symbols [LAST_TOKEN_PUNCTUATION ])) {
308310 s -> state |= STATE_EMPHASIS_DELIMITER_IS_OPEN ;
309311 lexer -> result_symbol = EMPHASIS_OPEN_UNDERSCORE ;
310312 return true;
@@ -326,22 +328,17 @@ static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
326328 // A backtick could mark the beginning or ending of a code span or a fenced
327329 // code block.
328330 return parse_backtick (s , lexer , valid_symbols );
329- break ;
330331 case '$' :
331332 return parse_dollar (s ,lexer , valid_symbols );
332- break ;
333333 case '*' :
334334 // A star could either mark the beginning or ending of emphasis, a list item or
335335 // thematic break.
336336 // This code is similar to the code for '_' and '+'.
337337 return parse_star (s ,lexer , valid_symbols );
338- break ;
339338 case '_' :
340339 return parse_underscore (s , lexer , valid_symbols );
341- break ;
342340 case '~' :
343341 return parse_tilde (s , lexer , valid_symbols );
344- break ;
345342 }
346343 return false;
347344}
0 commit comments