Browse Source

Add support for bold and italic text (part one).

tags/v0.3
Ben Kurtovic 10 years ago
parent
commit
2a82a57b2d
2 changed files with 81 additions and 11 deletions
  1. +16
    -7
      mwparserfromhell/parser/contexts.py
  2. +65
    -4
      mwparserfromhell/parser/tokenizer.py

+ 16
- 7
mwparserfromhell/parser/contexts.py View File

@@ -69,6 +69,11 @@ Local (stack-specific) contexts:
* :py:const:`TAG_BODY`
* :py:const:`TAG_CLOSE`

* :py:const:`STYLE`

* :py:const:`STYLE_ITALICS`
* :py:const:`STYLE_BOLD`

* :py:const:`DL_TERM`

* :py:const:`SAFETY_CHECK`
@@ -117,14 +122,18 @@ TAG_BODY = 1 << 16
TAG_CLOSE = 1 << 17
TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE

DL_TERM = 1 << 18
STYLE_ITALICS = 1 << 18
STYLE_BOLD = 1 << 19
STYLE = STYLE_ITALICS + STYLE_BOLD

DL_TERM = 1 << 20

HAS_TEXT = 1 << 19
FAIL_ON_TEXT = 1 << 20
FAIL_NEXT = 1 << 21
FAIL_ON_LBRACE = 1 << 22
FAIL_ON_RBRACE = 1 << 23
FAIL_ON_EQUALS = 1 << 24
HAS_TEXT = 1 << 21
FAIL_ON_TEXT = 1 << 22
FAIL_NEXT = 1 << 23
FAIL_ON_LBRACE = 1 << 24
FAIL_ON_RBRACE = 1 << 25
FAIL_ON_EQUALS = 1 << 26
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
FAIL_ON_RBRACE + FAIL_ON_EQUALS)



+ 65
- 4
mwparserfromhell/parser/tokenizer.py View File

@@ -57,11 +57,11 @@ class Tokenizer(object):
USES_C = False
START = object()
END = object()
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":",
"/", "-", "\n", END]
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";",
":", "/", "-", "\n", END]
MAX_DEPTH = 40
MAX_CYCLES = 100000
regex = re.compile(r"([{}\[\]<>|=&#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE)
tag_splitter = re.compile(r"([\s\"\\]+)")

def __init__(self):
@@ -629,6 +629,58 @@ class Tokenizer(object):
else:
self._emit_all(tag)

def _really_parse_style(self, context):
"""Parse wiki-style bold or italics. Raises :py:exc:`BadRoute`."""
stack = self._parse(context)
markup = "''" if context == contexts.STYLE_ITALICS else "'''"
tag = "i" if context == contexts.STYLE_ITALICS else "b"

self._emit(tokens.TagOpenOpen(wiki_markup=markup))
self._emit_text(tag)
self._emit(tokens.TagCloseOpen())
self._emit_all(stack)
self._emit(tokens.TagOpenClose())
self._emit_text(tag)
self._emit(tokens.TagCloseClose())

def _parse_style(self):
"""Parse wiki-style formatting (``''``/``'''`` for italics/bold)."""
self._head += 2
ticks = 2
while self._read() == "'":
self._head += 1
ticks += 1
reset = self._head

if ticks > 5:
self._emit_text("'" * (ticks - 5))
ticks = 5
elif ticks == 4:
self._emit_text("'")
ticks = 3

if ticks == 5:
raise NotImplementedError()
if ticks == 3:
try:
return self._really_parse_style(contexts.STYLE_BOLD)
except BadRoute:
self._emit_text("'")
self._head = reset
try:
self._really_parse_style(contexts.STYLE_ITALICS)
except BadRoute:
self._emit_text("''")
self._head = reset - 1

def _handle_style_end(self):
"""Handle the end of wiki-style italics or bold (``''`` or ``'''``)."""
self._head += 1 if self._context & contexts.STYLE_ITALICS else 2
while self._read(1) == "'":
self._emit_text("'")
self._head += 1
return self._pop()

def _handle_list_marker(self):
"""Handle a list marker at the head (``#``, ``*``, ``;``, ``:``)."""
markup = self._read()
@@ -667,7 +719,8 @@ class Tokenizer(object):
def _handle_end(self):
"""Handle the end of the stream of wikitext."""
fail = (contexts.TEMPLATE | contexts.ARGUMENT | contexts.WIKILINK |
contexts.HEADING | contexts.COMMENT | contexts.TAG)
contexts.HEADING | contexts.COMMENT | contexts.TAG |
contexts.STYLE)
double_fail = (contexts.TEMPLATE_PARAM_KEY | contexts.TAG_CLOSE)
if self._context & fail:
if self._context & contexts.TAG_BODY:
@@ -817,6 +870,14 @@ class Tokenizer(object):
self._emit_text("<")
elif this == ">" and self._context & contexts.TAG_CLOSE:
return self._handle_tag_close_close()
elif this == next == "'":
if not self._context & contexts.STYLE and self._can_recurse():
self._parse_style()
elif (self._context & contexts.STYLE_ITALICS or
self._read(2) == "'" and self._context & contexts.STYLE_BOLD):
return self._handle_style_end()
else:
self._emit_text("'")
elif self._read(-1) in ("\n", self.START):
if this in ("#", "*", ";", ":"):
self._handle_list()


Loading…
Cancel
Save