Browse Source

Only do a second pass if one would produce a different result.

tags/v0.3
Ben Kurtovic 10 years ago
parent
commit
4c0e4402b4
2 changed files with 27 additions and 20 deletions
  1. +14
    -12
      mwparserfromhell/parser/contexts.py
  2. +13
    -8
      mwparserfromhell/parser/tokenizer.py

+ 14
- 12
mwparserfromhell/parser/contexts.py View File

@@ -73,7 +73,8 @@ Local (stack-specific) contexts:

* :py:const:`STYLE_ITALICS`
* :py:const:`STYLE_BOLD`
* :py:const:`STYLE_PASS_2`
* :py:const:`STYLE_PASS_AGAIN`
* :py:const:`STYLE_SECOND_PASS`

* :py:const:`DL_TERM`

@@ -123,19 +124,20 @@ TAG_BODY = 1 << 16
TAG_CLOSE = 1 << 17
TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE

STYLE_ITALICS = 1 << 18
STYLE_BOLD = 1 << 19
STYLE_PASS_2 = 1 << 20
STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_2
STYLE_ITALICS = 1 << 18
STYLE_BOLD = 1 << 19
STYLE_PASS_AGAIN = 1 << 20
STYLE_SECOND_PASS = 1 << 21
STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS

DL_TERM = 1 << 21
DL_TERM = 1 << 22

HAS_TEXT = 1 << 22
FAIL_ON_TEXT = 1 << 23
FAIL_NEXT = 1 << 24
FAIL_ON_LBRACE = 1 << 25
FAIL_ON_RBRACE = 1 << 26
FAIL_ON_EQUALS = 1 << 27
HAS_TEXT = 1 << 23
FAIL_ON_TEXT = 1 << 24
FAIL_NEXT = 1 << 25
FAIL_ON_LBRACE = 1 << 26
FAIL_ON_RBRACE = 1 << 27
FAIL_ON_EQUALS = 1 << 28
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
FAIL_ON_RBRACE + FAIL_ON_EQUALS)



+ 13
- 8
mwparserfromhell/parser/tokenizer.py View File

@@ -32,7 +32,9 @@ __all__ = ["Tokenizer"]

class BadRoute(Exception):
"""Raised internally when the current tokenization route is invalid."""
pass

def __init__(self, context=0):
self.context = context


class _TagOpenData(object):
@@ -132,8 +134,9 @@ class Tokenizer(object):
Discards the current stack/context/textbuffer and raises
:py:exc:`~.BadRoute`.
"""
context = self._context
self._pop()
raise BadRoute()
raise BadRoute(context)

def _emit(self, token):
"""Write a token to the end of the current token stack."""
@@ -646,9 +649,11 @@ class Tokenizer(object):
if context & contexts.STYLE_ITALICS:
try:
stack = self._parse(context)
except BadRoute: ## only if STYLE_PASS_AGAIN in destroyed context
except BadRoute as route:
if not route.context & contexts.STYLE_PASS_AGAIN:
raise
self._head = reset
stack = self._parse(context | contexts.STYLE_PASS_2)
stack = self._parse(context | contexts.STYLE_SECOND_PASS)
else:
stack = self._parse(context)

@@ -672,11 +677,11 @@ class Tokenizer(object):
self._really_parse_style(contexts.STYLE_BOLD, reset, "'''", "b")
except BadRoute:
self._head = reset
if self._context & contexts.STYLE_PASS_2:
if self._context & contexts.STYLE_SECOND_PASS:
self._emit_text("'")
return True
elif self._context & contexts.STYLE_ITALICS:
# Set STYLE_PASS_AGAIN
self._context |= contexts.STYLE_PASS_AGAIN
self._emit_text("'''")
else:
self._emit_text("'")
@@ -752,10 +757,10 @@ class Tokenizer(object):
return self._pop()
elif not self._can_recurse():
if ticks == 3:
if self._context & contexts.STYLE_PASS_2:
if self._context & contexts.STYLE_SECOND_PASS:
self._emit_text("'")
return self._pop()
# Set STYLE_PASS_AGAIN
self._context |= contexts.STYLE_PASS_AGAIN
self._emit_text("'" * ticks)
elif ticks == 2:
self._parse_italics()


Loading…
Cancel
Save