Skip to content

Commit 83d56df

Browse files
committed
refactor: fix some implicit casts, bug in push_block, & make global variables const/static
1 parent 936cc84 commit 83d56df

File tree

2 files changed

+109
-120
lines changed

2 files changed

+109
-120
lines changed

tree-sitter-markdown-inline/src/scanner.c

Lines changed: 42 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,20 @@ typedef enum {
2020
} TokenType;
2121

2222
// Determines if a character is punctuation as defined by the markdown spec.
23-
static bool is_punctuation(char c) {
23+
static bool is_punctuation(char chr) {
2424
return
25-
(c >= '!' && c <= '/') ||
26-
(c >= ':' && c <= '@') ||
27-
(c >= '[' && c <= '`') ||
28-
(c >= '{' && c <= '~');
29-
}
30-
31-
// Determines if a character is ascii whitespace as defined by the markdown spec.
32-
static bool is_whitespace(char c) {
33-
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
25+
(chr >= '!' && chr <= '/') ||
26+
(chr >= ':' && chr <= '@') ||
27+
(chr >= '[' && chr <= '`') ||
28+
(chr >= '{' && chr <= '~');
3429
}
3530

3631
// State bitflags used with `Scanner.state`
3732

3833
// TODO
39-
const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3;
34+
static const uint8_t STATE_EMPHASIS_DELIMITER_MOD_3 = 0x3;
4035
// Current delimiter run is opening
41-
const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2;
36+
static const uint8_t STATE_EMPHASIS_DELIMITER_IS_OPEN = 0x1 << 2;
4237

4338
// Convenience function to emit the error token. This is done to stop invalid parse branches.
4439
// Specifically:
@@ -67,12 +62,12 @@ typedef struct {
6762

6863
// Write the whole state of a Scanner to a byte buffer
6964
static unsigned serialize(Scanner *s, char *buffer) {
70-
size_t i = 0;
71-
buffer[i++] = s->state;
72-
buffer[i++] = s->code_span_delimiter_length;
73-
buffer[i++] = s->latex_span_delimiter_length;
74-
buffer[i++] = s->num_emphasis_delimiters_left;
75-
return i;
65+
unsigned size = 0;
66+
buffer[size++] = (char)s->state;
67+
buffer[size++] = (char)s->code_span_delimiter_length;
68+
buffer[size++] = (char)s->latex_span_delimiter_length;
69+
buffer[size++] = (char)s->num_emphasis_delimiters_left;
70+
return size;
7671
}
7772

7873
// Read the whole state of a Scanner from a byte buffer
@@ -83,17 +78,17 @@ static void deserialize(Scanner *s, const char *buffer, unsigned length) {
8378
s->latex_span_delimiter_length = 0;
8479
s->num_emphasis_delimiters_left = 0;
8580
if (length > 0) {
86-
size_t i = 0;
87-
s->state = buffer[i++];
88-
s->code_span_delimiter_length = buffer[i++];
89-
s->latex_span_delimiter_length = buffer[i++];
90-
s->num_emphasis_delimiters_left = buffer[i++];
81+
size_t size = 0;
82+
s->state = (uint8_t)buffer[size++];
83+
s->code_span_delimiter_length = (uint8_t)buffer[size++];
84+
s->latex_span_delimiter_length = (uint8_t)buffer[size++];
85+
s->num_emphasis_delimiters_left = (uint8_t)buffer[size++];
9186
}
9287
}
9388

9489
static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, const bool *valid_symbols,
9590
const char delimiter, const TokenType open_token, const TokenType close_token) {
96-
size_t level = 0;
91+
uint8_t level = 0;
9792
while (lexer->lookahead == delimiter) {
9893
lexer->advance(lexer, false);
9994
level++;
@@ -103,7 +98,8 @@ static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, cons
10398
*delimiter_length = 0;
10499
lexer->result_symbol = close_token;
105100
return true;
106-
} else if (valid_symbols[open_token]) {
101+
}
102+
if (valid_symbols[open_token]) {
107103
// Parse ahead to check if there is a closing delimiter
108104
size_t close_level = 0;
109105
while (!lexer->eof(lexer)) {
@@ -113,17 +109,17 @@ static bool parse_leaf_delimiter(TSLexer *lexer, uint8_t* delimiter_length, cons
113109
if (close_level == level) {
114110
// Found a matching delimiter
115111
break;
116-
} else {
117-
close_level = 0;
118112
}
113+
close_level = 0;
119114
}
120115
lexer->advance(lexer, false);
121116
}
122117
if (close_level == level) {
123118
*delimiter_length = level;
124119
lexer->result_symbol = open_token;
125120
return true;
126-
} else if (valid_symbols[UNCLOSED_SPAN]) {
121+
}
122+
if (valid_symbols[UNCLOSED_SPAN]) {
127123
lexer->result_symbol = UNCLOSED_SPAN;
128124
return true;
129125
}
@@ -153,15 +149,16 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
153149
lexer->result_symbol = EMPHASIS_OPEN_STAR;
154150
s->num_emphasis_delimiters_left--;
155151
return true;
156-
} else if (valid_symbols[EMPHASIS_CLOSE_STAR]) {
152+
}
153+
if (valid_symbols[EMPHASIS_CLOSE_STAR]) {
157154
lexer->result_symbol = EMPHASIS_CLOSE_STAR;
158155
s->num_emphasis_delimiters_left--;
159156
return true;
160157
}
161158
}
162159
lexer->mark_end(lexer);
163160
// Otherwise count the number of stars
164-
size_t star_count = 1;
161+
uint8_t star_count = 1;
165162
while (lexer->lookahead == '*') {
166163
star_count++;
167164
lexer->advance(lexer, false);
@@ -174,7 +171,7 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
174171
// Look ahead to the next symbol (after the last star) to find out if it is whitespace
175172
// punctuation or other.
176173
bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
177-
bool next_symbol_punctuation = is_punctuation(lexer->lookahead);
174+
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
178175
// Information about the last token is in valid_symbols. See grammar.js for these
179176
// tokens for how this is done.
180177
if (
@@ -189,7 +186,8 @@ static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
189186
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
190187
lexer->result_symbol = EMPHASIS_CLOSE_STAR;
191188
return true;
192-
} else if (
189+
}
190+
if (
193191
!next_symbol_whitespace && (
194192
!next_symbol_punctuation ||
195193
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
@@ -216,15 +214,16 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
216214
lexer->result_symbol = STRIKETHROUGH_OPEN;
217215
s->num_emphasis_delimiters_left--;
218216
return true;
219-
} else if (valid_symbols[STRIKETHROUGH_CLOSE]) {
217+
}
218+
if (valid_symbols[STRIKETHROUGH_CLOSE]) {
220219
lexer->result_symbol = STRIKETHROUGH_CLOSE;
221220
s->num_emphasis_delimiters_left--;
222221
return true;
223222
}
224223
}
225224
lexer->mark_end(lexer);
226225
// Otherwise count the number of tildes
227-
size_t star_count = 1;
226+
uint8_t star_count = 1;
228227
while (lexer->lookahead == '~') {
229228
star_count++;
230229
lexer->advance(lexer, false);
@@ -237,7 +236,7 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
237236
// Look ahead to the next symbol (after the last star) to find out if it is whitespace
238237
// punctuation or other.
239238
bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
240-
bool next_symbol_punctuation = is_punctuation(lexer->lookahead);
239+
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
241240
// Information about the last token is in valid_symbols. See grammar.js for these
242241
// tokens for how this is done.
243242
if (
@@ -252,7 +251,8 @@ static bool parse_tilde(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
252251
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
253252
lexer->result_symbol = STRIKETHROUGH_CLOSE;
254253
return true;
255-
} else if (
254+
}
255+
if (
256256
!next_symbol_whitespace && (
257257
!next_symbol_punctuation ||
258258
valid_symbols[LAST_TOKEN_PUNCTUATION] ||
@@ -278,15 +278,16 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
278278
lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
279279
s->num_emphasis_delimiters_left--;
280280
return true;
281-
} else if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
281+
}
282+
if (valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
282283
lexer->result_symbol = EMPHASIS_CLOSE_UNDERSCORE;
283284
s->num_emphasis_delimiters_left--;
284285
return true;
285286
}
286287
}
287288
lexer->mark_end(lexer);
288289
// Otherwise count the number of stars
289-
size_t underscore_count = 1;
290+
uint8_t underscore_count = 1;
290291
while (lexer->lookahead == '_') {
291292
underscore_count++;
292293
lexer->advance(lexer, false);
@@ -295,7 +296,7 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
295296
if (valid_symbols[EMPHASIS_OPEN_UNDERSCORE] || valid_symbols[EMPHASIS_CLOSE_UNDERSCORE]) {
296297
s->num_emphasis_delimiters_left = underscore_count - 1;
297298
bool next_symbol_whitespace = line_end || lexer->lookahead == ' ' || lexer->lookahead == '\t';
298-
bool next_symbol_punctuation = is_punctuation(lexer->lookahead);
299+
bool next_symbol_punctuation = is_punctuation((char)lexer->lookahead);
299300
bool right_flanking = !valid_symbols[LAST_TOKEN_WHITESPACE] &&
300301
(!valid_symbols[LAST_TOKEN_PUNCTUATION] || next_symbol_punctuation || next_symbol_whitespace);
301302
bool left_flanking = !next_symbol_whitespace &&
@@ -304,7 +305,8 @@ static bool parse_underscore(Scanner *s, TSLexer *lexer, const bool *valid_symbo
304305
s->state &= ~STATE_EMPHASIS_DELIMITER_IS_OPEN;
305306
lexer->result_symbol = EMPHASIS_CLOSE_UNDERSCORE;
306307
return true;
307-
} else if (left_flanking && (!right_flanking || valid_symbols[LAST_TOKEN_PUNCTUATION])) {
308+
}
309+
if (left_flanking && (!right_flanking || valid_symbols[LAST_TOKEN_PUNCTUATION])) {
308310
s->state |= STATE_EMPHASIS_DELIMITER_IS_OPEN;
309311
lexer->result_symbol = EMPHASIS_OPEN_UNDERSCORE;
310312
return true;
@@ -326,22 +328,17 @@ static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
326328
// A backtick could mark the beginning or ending of a code span or a fenced
327329
// code block.
328330
return parse_backtick(s, lexer, valid_symbols);
329-
break;
330331
case '$':
331332
return parse_dollar(s,lexer, valid_symbols);
332-
break;
333333
case '*':
334334
// A star could either mark the beginning or ending of emphasis, a list item or
335335
// thematic break.
336336
// This code is similar to the code for '_' and '+'.
337337
return parse_star(s,lexer, valid_symbols);
338-
break;
339338
case '_':
340339
return parse_underscore(s, lexer, valid_symbols);
341-
break;
342340
case '~':
343341
return parse_tilde(s, lexer, valid_symbols);
344-
break;
345342
}
346343
return false;
347344
}

0 commit comments

Comments
 (0)