Sfoglia il codice sorgente

Only do a second pass if one would produce a different result.

tags/v0.3
Ben Kurtovic 10 anni fa
parent
commit
4c0e4402b4
2 ha cambiato i file con 27 aggiunte e 20 eliminazioni
  1. +14
    -12
      mwparserfromhell/parser/contexts.py
  2. +13
    -8
      mwparserfromhell/parser/tokenizer.py

+ 14
- 12
mwparserfromhell/parser/contexts.py Vedi File

@@ -73,7 +73,8 @@ Local (stack-specific) contexts:


* :py:const:`STYLE_ITALICS` * :py:const:`STYLE_ITALICS`
* :py:const:`STYLE_BOLD` * :py:const:`STYLE_BOLD`
* :py:const:`STYLE_PASS_2`
* :py:const:`STYLE_PASS_AGAIN`
* :py:const:`STYLE_SECOND_PASS`


* :py:const:`DL_TERM` * :py:const:`DL_TERM`


@@ -123,19 +124,20 @@ TAG_BODY = 1 << 16
TAG_CLOSE = 1 << 17 TAG_CLOSE = 1 << 17
TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE


STYLE_ITALICS = 1 << 18
STYLE_BOLD = 1 << 19
STYLE_PASS_2 = 1 << 20
STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_2
STYLE_ITALICS = 1 << 18
STYLE_BOLD = 1 << 19
STYLE_PASS_AGAIN = 1 << 20
STYLE_SECOND_PASS = 1 << 21
STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS


DL_TERM = 1 << 21
DL_TERM = 1 << 22


HAS_TEXT = 1 << 22
FAIL_ON_TEXT = 1 << 23
FAIL_NEXT = 1 << 24
FAIL_ON_LBRACE = 1 << 25
FAIL_ON_RBRACE = 1 << 26
FAIL_ON_EQUALS = 1 << 27
HAS_TEXT = 1 << 23
FAIL_ON_TEXT = 1 << 24
FAIL_NEXT = 1 << 25
FAIL_ON_LBRACE = 1 << 26
FAIL_ON_RBRACE = 1 << 27
FAIL_ON_EQUALS = 1 << 28
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
FAIL_ON_RBRACE + FAIL_ON_EQUALS) FAIL_ON_RBRACE + FAIL_ON_EQUALS)




+ 13
- 8
mwparserfromhell/parser/tokenizer.py Vedi File

@@ -32,7 +32,9 @@ __all__ = ["Tokenizer"]


class BadRoute(Exception): class BadRoute(Exception):
"""Raised internally when the current tokenization route is invalid.""" """Raised internally when the current tokenization route is invalid."""
pass

def __init__(self, context=0):
self.context = context




class _TagOpenData(object): class _TagOpenData(object):
@@ -132,8 +134,9 @@ class Tokenizer(object):
Discards the current stack/context/textbuffer and raises Discards the current stack/context/textbuffer and raises
:py:exc:`~.BadRoute`. :py:exc:`~.BadRoute`.
""" """
context = self._context
self._pop() self._pop()
raise BadRoute()
raise BadRoute(context)


def _emit(self, token): def _emit(self, token):
"""Write a token to the end of the current token stack.""" """Write a token to the end of the current token stack."""
@@ -646,9 +649,11 @@ class Tokenizer(object):
if context & contexts.STYLE_ITALICS: if context & contexts.STYLE_ITALICS:
try: try:
stack = self._parse(context) stack = self._parse(context)
except BadRoute: ## only if STYLE_PASS_AGAIN in destroyed context
except BadRoute as route:
if not route.context & contexts.STYLE_PASS_AGAIN:
raise
self._head = reset self._head = reset
stack = self._parse(context | contexts.STYLE_PASS_2)
stack = self._parse(context | contexts.STYLE_SECOND_PASS)
else: else:
stack = self._parse(context) stack = self._parse(context)


@@ -672,11 +677,11 @@ class Tokenizer(object):
self._really_parse_style(contexts.STYLE_BOLD, reset, "'''", "b") self._really_parse_style(contexts.STYLE_BOLD, reset, "'''", "b")
except BadRoute: except BadRoute:
self._head = reset self._head = reset
if self._context & contexts.STYLE_PASS_2:
if self._context & contexts.STYLE_SECOND_PASS:
self._emit_text("'") self._emit_text("'")
return True return True
elif self._context & contexts.STYLE_ITALICS: elif self._context & contexts.STYLE_ITALICS:
# Set STYLE_PASS_AGAIN
self._context |= contexts.STYLE_PASS_AGAIN
self._emit_text("'''") self._emit_text("'''")
else: else:
self._emit_text("'") self._emit_text("'")
@@ -752,10 +757,10 @@ class Tokenizer(object):
return self._pop() return self._pop()
elif not self._can_recurse(): elif not self._can_recurse():
if ticks == 3: if ticks == 3:
if self._context & contexts.STYLE_PASS_2:
if self._context & contexts.STYLE_SECOND_PASS:
self._emit_text("'") self._emit_text("'")
return self._pop() return self._pop()
# Set STYLE_PASS_AGAIN
self._context |= contexts.STYLE_PASS_AGAIN
self._emit_text("'" * ticks) self._emit_text("'" * ticks)
elif ticks == 2: elif ticks == 2:
self._parse_italics() self._parse_italics()


Caricamento…
Annulla
Salva