diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index e67890d42..3e6fcc433 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -7,6 +7,7 @@ from .parsers.lalr_parser import LALR_Parser from .tree import Tree from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType +from .visitors import Transformer if TYPE_CHECKING: from .parsers.lalr_analysis import ParseTableBase @@ -153,8 +154,15 @@ def _validate_frontend_args(parser, lexer) -> None: def _get_lexer_callbacks(transformer, terminals): result = {} + # Tokens without a dedicated transformer method fall back to + # __default_token__, mirroring Transformer.transform(). The base + # implementation is a no-op, so it's only wired up when overridden, + # to avoid a needless call per token (see issue #1582). + default_token = getattr(transformer, '__default_token__', None) + if getattr(type(transformer), '__default_token__', None) is Transformer.__default_token__: + default_token = None for terminal in terminals: - callback = getattr(transformer, terminal.name, None) + callback = getattr(transformer, terminal.name, default_token) if callback is not None: result[terminal.name] = callback return result diff --git a/tests/test_parser.py b/tests/test_parser.py index 721e8693a..4f0eb784f 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2770,6 +2770,34 @@ def __default__(self, data, children, meta): b = parser.parse(s) assert a == b + @unittest.skipIf(PARSER != 'lalr', "Embedded token callbacks are only applied by the lalr parser") + def test_default_token_in_treeless_mode(self): + # Regression test for issue #1582: an embedded transformer did not + # call __default_token__ on tokens, unlike Transformer.transform(). + grammar = r""" + start: expr + + expr: A B + | A expr B + + A: "a" + B: "b" + + %import common.WS + %ignore WS + """ + s = 'a a a b b b' + + class AbTransformer(Transformer): + def __default_token__(self, token): + return token.update(value=str(token).upper()) + + parser = _Lark(grammar) + a = AbTransformer().transform(parser.parse(s)) + parser = _Lark(grammar, transformer=AbTransformer()) + b = parser.parse(s) + assert a == b + @unittest.skipIf(PARSER != 'lalr', "strict mode is only supported in lalr for now") def test_strict(self): # Test regex collision