Przeglądaj źródła

Implement arguments in tokenizer - preliminary results are promising.

tags/v0.1
Ben Kurtovic 12 lat temu
rodzic
commit
c01f78aee1
3 zmienionych plików z 77 dodań i 30 usunięć
  1. +1
    -1
      mwparserfromhell/nodes/heading.py
  2. +31
    -22
      mwparserfromhell/parser/contexts.py
  3. +45
    -7
      mwparserfromhell/parser/tokenizer.py

+ 1
- 1
mwparserfromhell/nodes/heading.py Wyświetl plik

@@ -24,7 +24,7 @@ from __future__ import unicode_literals

from . import Node
from ..compat import str
from ...utils import parse_anything
from ..utils import parse_anything

__all__ = ["Heading"]



+ 31
- 22
mwparserfromhell/parser/contexts.py Wyświetl plik

@@ -35,20 +35,25 @@ will cover ``BAR == 0b10`` and ``BAZ == 0b01``).

Local (stack-specific) contexts:

* :py:const:`TEMPLATE` (``0b000000111``)
* :py:const:`TEMPLATE` (``0b00000000111``)

* :py:const:`TEMPLATE_NAME` (``0b000000001``)
* :py:const:`TEMPLATE_PARAM_KEY` (``0b000000010``)
* :py:const:`TEMPLATE_PARAM_VALUE` (``0b000000100``)
* :py:const:`TEMPLATE_NAME` (``0b00000000001``)
* :py:const:`TEMPLATE_PARAM_KEY` (``0b00000000010``)
* :py:const:`TEMPLATE_PARAM_VALUE` (``0b00000000100``)

* :py:const:`ARGUMENT` (``0b00000011000``)

* :py:const:`ARGUMENT_NAME` (``0b00000001000``)
* :py:const:`ARGUMENT_DEFAULT` (``0b00000010000``)

* :py:const:`HEADING` (``0b111111000``)

* :py:const:`HEADING_LEVEL_1` (``0b000001000``)
* :py:const:`HEADING_LEVEL_2` (``0b000010000``)
* :py:const:`HEADING_LEVEL_3` (``0b000100000``)
* :py:const:`HEADING_LEVEL_4` (``0b001000000``)
* :py:const:`HEADING_LEVEL_5` (``0b010000000``)
* :py:const:`HEADING_LEVEL_6` (``0b100000000``)
* :py:const:`HEADING_LEVEL_1` (``0b00000100000``)
* :py:const:`HEADING_LEVEL_2` (``0b00001000000``)
* :py:const:`HEADING_LEVEL_3` (``0b00010000000``)
* :py:const:`HEADING_LEVEL_4` (``0b00100000000``)
* :py:const:`HEADING_LEVEL_5` (``0b01000000000``)
* :py:const:`HEADING_LEVEL_6` (``0b10000000000``)

Global contexts:

@@ -57,18 +62,22 @@ Global contexts:

# Local contexts:

TEMPLATE = 0b000000111
TEMPLATE_NAME = 0b000000001
TEMPLATE_PARAM_KEY = 0b000000010
TEMPLATE_PARAM_VALUE = 0b000000100

HEADING = 0b111111000
HEADING_LEVEL_1 = 0b000001000
HEADING_LEVEL_2 = 0b000010000
HEADING_LEVEL_3 = 0b000100000
HEADING_LEVEL_4 = 0b001000000
HEADING_LEVEL_5 = 0b010000000
HEADING_LEVEL_6 = 0b100000000
TEMPLATE = 0b00000000111
TEMPLATE_NAME = 0b00000000001
TEMPLATE_PARAM_KEY = 0b00000000010
TEMPLATE_PARAM_VALUE = 0b00000000100

ARGUMENT = 0b00000011000
ARGUMENT_NAME = 0b00000001000
ARGUMENT_DEFAULT = 0b00000010000

HEADING = 0b11111100000
HEADING_LEVEL_1 = 0b00000100000
HEADING_LEVEL_2 = 0b00001000000
HEADING_LEVEL_3 = 0b00010000000
HEADING_LEVEL_4 = 0b00100000000
HEADING_LEVEL_5 = 0b01000000000
HEADING_LEVEL_6 = 0b10000000000


# Global contexts:


+ 45
- 7
mwparserfromhell/parser/tokenizer.py Wyświetl plik

@@ -135,10 +135,23 @@ class Tokenizer(object):
self._fail_route()
return self.END

def _parse_template(self):
def _parse_template_or_argument(self):
"""Parse a template at the head of the wikicode string."""
reset = self._head
self._head += 2

if self._read() == "{":
self._head += 1
try:
argument = self._parse(contexts.ARGUMENT_NAME)
except BadRoute:
pass
else:
self._write(tokens.ArgumentOpen())
self._write_all(argument)
self._write(tokens.ArgumentClose())
return

try:
template = self._parse(contexts.TEMPLATE_NAME)
except BadRoute:
@@ -181,12 +194,26 @@ class Tokenizer(object):
self._write(tokens.TemplateParamEquals())

def _handle_template_end(self):
"""Handle the end of the template at the head of the string."""
"""Handle the end of a template at the head of the string."""
if self._context & contexts.TEMPLATE_NAME:
self._verify_no_newlines()
self._head += 1
return self._pop()

def _handle_argument_separator(self):
"""Handle the separator between an argument's name and default."""
self._verify_no_newlines()
self._context ^= contexts.ARGUMENT_NAME
self._context |= contexts.ARGUMENT_DEFAULT
self._write(tokens.ArgumentSeparator())

def _handle_argument_end(self):
"""Handle the end of an argument at the head of the string."""
if self._context & contexts.TEMPLATE_NAME:
self._verify_no_newlines()
self._head += 2
return self._pop()

def _parse_heading(self):
"""Parse a section heading at the head of the wikicode string."""
self._global |= contexts.GL_HEADING
@@ -299,20 +326,31 @@ class Tokenizer(object):
self._head += 1
continue
if this is self.END:
if self._context & (contexts.TEMPLATE | contexts.HEADING):
fail = contexts.TEMPLATE | contexts.ARGUMENT | contexts.HEADING
if self._context & fail:
self._fail_route()
return self._pop()
prev, next = self._read(-1), self._read(1)
next = self._read(1)
if this == next == "{":
self._parse_template()
self._parse_template_or_argument()
elif this == "|" and self._context & contexts.TEMPLATE:
self._handle_template_param()
elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY:
self._handle_template_param_value()
elif this == next == "}" and self._context & contexts.TEMPLATE:
return self._handle_template_end()
elif (prev == "\n" or prev == self.START) and this == "=" and not self._global & contexts.GL_HEADING:
self._parse_heading()
elif this == "|" and self._context & contexts.ARGUMENT_NAME:
self._handle_argument_separator()
elif this == next == "}" and self._context & contexts.ARGUMENT:
if self._read(2) == "}":
return self._handle_argument_end()
else:
self._write_text("}")
elif this == "=" and not self._global & contexts.GL_HEADING:
if self._read(-1) in ("\n", self.START):
self._parse_heading()
else:
self._write_text("=")
elif this == "=" and self._context & contexts.HEADING:
return self._handle_heading_end()
elif this == "\n" and self._context & contexts.HEADING:


Ładowanie…
Anuluj
Zapisz