From a9a4ba60a37eb351d3ed872f1cfcf53e673be9ef Mon Sep 17 00:00:00 2001 From: Benedikt Peetz Date: Sat, 14 Dec 2024 12:29:47 +0100 Subject: feat(tree-sitter-yts): Update to new tree-sitter version & improve parsing --- tree-sitter-yts/src/tree_sitter/parser.h | 433 ++++++++++++++++--------------- 1 file changed, 229 insertions(+), 204 deletions(-) (limited to 'tree-sitter-yts/src/tree_sitter/parser.h') diff --git a/tree-sitter-yts/src/tree_sitter/parser.h b/tree-sitter-yts/src/tree_sitter/parser.h index 433fdf0..fab4621 100644 --- a/tree-sitter-yts/src/tree_sitter/parser.h +++ b/tree-sitter-yts/src/tree_sitter/parser.h @@ -14,240 +14,265 @@ #define TREE_SITTER_PARSER_H_ #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif #include #include #include -#define ts_builtin_sym_error ((TSSymbol) - 1) +#define ts_builtin_sym_error ((TSSymbol)-1) #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 - typedef uint16_t TSStateId; - #ifndef TREE_SITTER_API_H_ - typedef uint16_t TSSymbol; - typedef uint16_t TSFieldId; - typedef struct TSLanguage TSLanguage; +typedef uint16_t TSStateId; +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; #endif - typedef struct - { - TSFieldId field_id; - uint8_t child_index; - bool inherited; - } TSFieldMapEntry; - - typedef struct - { - uint16_t index; - uint16_t length; - } TSFieldMapSlice; - - typedef struct - { - bool visible; - bool named; - bool supertype; - } TSSymbolMetadata; - - typedef struct TSLexer TSLexer; - - struct TSLexer - { - int32_t lookahead; - TSSymbol result_symbol; - void (*advance) (TSLexer *, bool); - void (*mark_end) (TSLexer *); - uint32_t (*get_column) (TSLexer *); - bool (*is_at_included_range_start) (const TSLexer *); - bool (*eof) (const TSLexer *); - }; - - typedef enum - { - TSParseActionTypeShift, - TSParseActionTypeReduce, - TSParseActionTypeAccept, - TSParseActionTypeRecover, - } TSParseActionType; - - typedef union - { - struct - { - uint8_t type; - TSStateId state; - bool extra; - bool repetition; - } shift; - struct - { - uint8_t type; - uint8_t child_count; - TSSymbol symbol; - int16_t dynamic_precedence; - uint16_t production_id; - } reduce; +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +typedef struct { + uint16_t index; + uint16_t length; +} TSFieldMapSlice; + +typedef struct { + bool visible; + bool named; + bool supertype; +} TSSymbolMetadata; + +typedef struct TSLexer TSLexer; + +struct TSLexer { + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); + void (*log)(const TSLexer *, const char *, ...); +}; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { uint8_t type; - } TSParseAction; - - typedef struct - { - uint16_t lex_state; - uint16_t external_lex_state; - } TSLexMode; - - typedef union - { - TSParseAction action; - struct - { - uint8_t count; - bool reusable; - } entry; - } TSParseActionEntry; - - struct TSLanguage - { - uint32_t version; - uint32_t symbol_count; - uint32_t alias_count; - uint32_t token_count; - uint32_t external_token_count; - uint32_t state_count; - uint32_t large_state_count; - uint32_t production_id_count; - uint32_t field_count; - uint16_t max_alias_sequence_length; - const uint16_t *parse_table; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map; - const TSParseActionEntry *parse_actions; - const char *const *symbol_names; - const char *const *field_names; - const TSFieldMapSlice *field_map_slices; - const TSFieldMapEntry *field_map_entries; - const TSSymbolMetadata *symbol_metadata; - const TSSymbol *public_symbol_map; - const uint16_t *alias_map; - const TSSymbol *alias_sequences; - const TSLexMode *lex_modes; - bool (*lex_fn) (TSLexer *, TSStateId); - bool (*keyword_lex_fn) (TSLexer *, TSStateId); - TSSymbol keyword_capture_token; - struct - { - const bool *states; - const TSSymbol *symbol_map; - void *(*create) (void); - void (*destroy) (void *); - bool (*scan) (void *, TSLexer *, const bool *symbol_whitelist); - unsigned (*serialize) (void *, char *); - void (*deserialize) (void *, const char *, unsigned); - } external_scanner; - const TSStateId *primary_state_ids; - }; - - /* - * Lexer Macros - */ - -#define START_LEXER() \ - bool result = false; \ - bool skip = false; \ - bool eof = false; \ - int32_t lookahead; \ - goto start; \ - next_state: \ - lexer->advance (lexer, skip); \ - start: \ - skip = false; \ + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable; + } entry; +} TSParseActionEntry; + +typedef struct { + int32_t start; + int32_t end; +} TSCharacterRange; + +struct TSLanguage { + uint32_t version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char * const *symbol_names; + const char * const *field_names; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexMode *lex_modes; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + const TSStateId *primary_state_ids; +}; + +static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) { + return true; + } else if (lookahead > range->end) { + index = mid_index; + } + size -= half_size; + } + TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + +/* + * Lexer Macros + */ + +#ifdef _MSC_VER +#define UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNUSED __attribute__((unused)) +#endif + +#define START_LEXER() \ + bool result = false; \ + bool skip = false; \ + UNUSED \ + bool eof = false; \ + int32_t lookahead; \ + goto start; \ + next_state: \ + lexer->advance(lexer, skip); \ + start: \ + skip = false; \ lookahead = lexer->lookahead; -#define ADVANCE(state_value) \ - { \ - state = state_value; \ - goto next_state; \ +#define ADVANCE(state_value) \ + { \ + state = state_value; \ + goto next_state; \ } -#define SKIP(state_value) \ - { \ - skip = true; \ - state = state_value; \ - goto next_state; \ +#define ADVANCE_MAP(...) \ + { \ + static const uint16_t map[] = { __VA_ARGS__ }; \ + for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ + if (map[i] == lookahead) { \ + state = map[i + 1]; \ + goto next_state; \ + } \ + } \ } -#define ACCEPT_TOKEN(symbol_value) \ - result = true; \ - lexer->result_symbol = symbol_value; \ - lexer->mark_end (lexer); +#define SKIP(state_value) \ + { \ + skip = true; \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); #define END_STATE() return result; - /* - * Parse Table Macros - */ +/* + * Parse Table Macros + */ -#define SMALL_STATE(id) id - LARGE_STATE_COUNT +#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) #define STATE(id) id #define ACTIONS(id) id -#define SHIFT(state_value) \ - { \ - { \ - .shift = {.type = TSParseActionTypeShift, .state = state_value } \ - } \ - } - -#define SHIFT_REPEAT(state_value) \ - { \ - { \ - .shift \ - = {.type = TSParseActionTypeShift, \ - .state = state_value, \ - .repetition = true } \ - } \ - } - -#define SHIFT_EXTRA() \ - { \ - { \ - .shift = {.type = TSParseActionTypeShift, .extra = true } \ - } \ - } - -#define REDUCE(symbol_val, child_count_val, ...) \ - { \ - { \ - .reduce = { .type = TSParseActionTypeReduce, \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - __VA_ARGS__ }, \ - } \ - } - -#define RECOVER() \ - { \ - { \ - .type = TSParseActionTypeRecover \ - } \ - } - -#define ACCEPT_INPUT() \ - { \ - { \ - .type = TSParseActionTypeAccept \ - } \ - } +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value) \ + } \ + }} + +#define SHIFT_REPEAT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value), \ + .repetition = true \ + } \ + }} + +#define SHIFT_EXTRA() \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ + }} + +#define REDUCE(symbol_name, children, precedence, prod_id) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_name, \ + .child_count = children, \ + .dynamic_precedence = precedence, \ + .production_id = prod_id \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} #ifdef __cplusplus } #endif -#endif // TREE_SITTER_PARSER_H_ +#endif // TREE_SITTER_PARSER_H_ -- cgit 1.4.1