diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index a7e70fa..d3f0254 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -73,7 +73,8 @@ Local (stack-specific) contexts: * :py:const:`STYLE_ITALICS` * :py:const:`STYLE_BOLD` - * :py:const:`STYLE_PASS_2` + * :py:const:`STYLE_PASS_AGAIN` + * :py:const:`STYLE_SECOND_PASS` * :py:const:`DL_TERM` @@ -123,19 +124,20 @@ TAG_BODY = 1 << 16 TAG_CLOSE = 1 << 17 TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE -STYLE_ITALICS = 1 << 18 -STYLE_BOLD = 1 << 19 -STYLE_PASS_2 = 1 << 20 -STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_2 +STYLE_ITALICS = 1 << 18 +STYLE_BOLD = 1 << 19 +STYLE_PASS_AGAIN = 1 << 20 +STYLE_SECOND_PASS = 1 << 21 +STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS -DL_TERM = 1 << 21 +DL_TERM = 1 << 22 -HAS_TEXT = 1 << 22 -FAIL_ON_TEXT = 1 << 23 -FAIL_NEXT = 1 << 24 -FAIL_ON_LBRACE = 1 << 25 -FAIL_ON_RBRACE = 1 << 26 -FAIL_ON_EQUALS = 1 << 27 +HAS_TEXT = 1 << 23 +FAIL_ON_TEXT = 1 << 24 +FAIL_NEXT = 1 << 25 +FAIL_ON_LBRACE = 1 << 26 +FAIL_ON_RBRACE = 1 << 27 +FAIL_ON_EQUALS = 1 << 28 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + FAIL_ON_RBRACE + FAIL_ON_EQUALS) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 89481d8..4b9b9db 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -32,7 +32,9 @@ __all__ = ["Tokenizer"] class BadRoute(Exception): """Raised internally when the current tokenization route is invalid.""" - pass + + def __init__(self, context=0): + self.context = context class _TagOpenData(object): @@ -132,8 +134,9 @@ class Tokenizer(object): Discards the current stack/context/textbuffer and raises :py:exc:`~.BadRoute`. """ + context = self._context self._pop() - raise BadRoute() + raise BadRoute(context) def _emit(self, token): """Write a token to the end of the current token stack.""" @@ -646,9 +649,11 @@ class Tokenizer(object): if context & contexts.STYLE_ITALICS: try: stack = self._parse(context) - except BadRoute: ## only if STYLE_PASS_AGAIN in destroyed context + except BadRoute as route: + if not route.context & contexts.STYLE_PASS_AGAIN: + raise self._head = reset - stack = self._parse(context | contexts.STYLE_PASS_2) + stack = self._parse(context | contexts.STYLE_SECOND_PASS) else: stack = self._parse(context) @@ -672,11 +677,11 @@ class Tokenizer(object): self._really_parse_style(contexts.STYLE_BOLD, reset, "'''", "b") except BadRoute: self._head = reset - if self._context & contexts.STYLE_PASS_2: + if self._context & contexts.STYLE_SECOND_PASS: self._emit_text("'") return True elif self._context & contexts.STYLE_ITALICS: - # Set STYLE_PASS_AGAIN + self._context |= contexts.STYLE_PASS_AGAIN self._emit_text("'''") else: self._emit_text("'") @@ -752,10 +757,10 @@ class Tokenizer(object): return self._pop() elif not self._can_recurse(): if ticks == 3: - if self._context & contexts.STYLE_PASS_2: + if self._context & contexts.STYLE_SECOND_PASS: self._emit_text("'") return self._pop() - # Set STYLE_PASS_AGAIN + self._context |= contexts.STYLE_PASS_AGAIN self._emit_text("'" * ticks) elif ticks == 2: self._parse_italics()