Browse Source

Support wikilinks in tokenizer.

tags/v0.1.1
Ben Kurtovic 11 years ago
parent
commit
99e466857b
2 changed files with 59 additions and 18 deletions
  1. +22
    -18
      mwparserfromhell/parser/contexts.py
  2. +37
    -0
      mwparserfromhell/parser/tokenizer.py

+ 22
- 18
mwparserfromhell/parser/contexts.py View File

@@ -64,24 +64,28 @@ Global contexts:

# Local contexts:

TEMPLATE = 0b000000000111
TEMPLATE_NAME = 0b000000000001
TEMPLATE_PARAM_KEY = 0b000000000010
TEMPLATE_PARAM_VALUE = 0b000000000100

ARGUMENT = 0b000000011000
ARGUMENT_NAME = 0b000000001000
ARGUMENT_DEFAULT = 0b000000010000

HEADING = 0b011111100000
HEADING_LEVEL_1 = 0b000000100000
HEADING_LEVEL_2 = 0b000001000000
HEADING_LEVEL_3 = 0b000010000000
HEADING_LEVEL_4 = 0b000100000000
HEADING_LEVEL_5 = 0b001000000000
HEADING_LEVEL_6 = 0b010000000000

COMMENT = 0b100000000000
TEMPLATE = 0b00000000000111
TEMPLATE_NAME = 0b00000000000001
TEMPLATE_PARAM_KEY = 0b00000000000010
TEMPLATE_PARAM_VALUE = 0b00000000000100

ARGUMENT = 0b00000000011000
ARGUMENT_NAME = 0b00000000001000
ARGUMENT_DEFAULT = 0b00000000010000

WIKILINK = 0b00000001100000
WIKILINK_TITLE = 0b00000000100000
WIKILINK_TEXT = 0b00000001000000

HEADING = 0b01111110000000
HEADING_LEVEL_1 = 0b00000010000000
HEADING_LEVEL_2 = 0b00000100000000
HEADING_LEVEL_3 = 0b00001000000000
HEADING_LEVEL_4 = 0b00010000000000
HEADING_LEVEL_5 = 0b00100000000000
HEADING_LEVEL_6 = 0b01000000000000

COMMENT = 0b10000000000000


# Global contexts:


+ 37
- 0
mwparserfromhell/parser/tokenizer.py View File

@@ -278,6 +278,34 @@ class Tokenizer(object):
self._head += 2
return self._pop()

def _parse_wikilink(self):
"""Parse an internal wikilink at the head of the wikicode string."""
self._head += 2
reset = self._head - 1
try:
wikilink = self._parse(contexts.WIKILINK_TITLE)
except BadRoute:
self._head = reset
self._write_text("[[")
else:
self._write(tokens.WikilinkOpen())
self._write_all(wikilink)
self._write(tokens.WikilinkClose())

def _handle_wikilink_separator(self):
"""Handle the separator between a wikilink's title and its text."""
self._verify_safe(["\n", "{", "}", "[", "]"])
self._context ^= contexts.WIKILINK_TITLE
self._context |= contexts.WIKILINK_TEXT
self._write(tokens.WikilinkSeparator())

def _handle_wikilink_end(self):
"""Handle the end of a wikilink at the head of the string."""
if self._context & contexts.WIKILINK_TITLE:
self._verify_safe(["\n", "{", "}", "[", "]"])
self._head += 1
return self._pop()

def _parse_heading(self):
"""Parse a section heading at the head of the wikicode string."""
self._global |= contexts.GL_HEADING
@@ -431,6 +459,15 @@ class Tokenizer(object):
return self._handle_argument_end()
else:
self._write_text("}")
elif this == next == "[":
if not self._context & contexts.WIKILINK_TITLE:
self._parse_wikilink()
else:
self._write_text("[")
elif this == "|" and self._context & contexts.WIKILINK_TITLE:
self._handle_wikilink_separator()
elif this == next == "]" and self._context & contexts.WIKILINK:
return self._handle_wikilink_end()
elif this == "=" and not self._global & contexts.GL_HEADING:
if self._read(-1) in ("\n", self.START):
self._parse_heading()


Loading…
Cancel
Save