Browse Source

Merge branch 'develop' into feature/c_refactor

tags/v0.4.1
Ben Kurtovic 8 years ago
parent
commit
8b72c783f0
15 changed files with 209 additions and 127 deletions
  1. +2
    -1
      .travis.yml
  2. +5
    -0
      CHANGELOG
  3. +1
    -1
      README.rst
  4. +7
    -0
      docs/changelog.rst
  5. +1
    -1
      docs/integration.rst
  6. +58
    -53
      mwparserfromhell/nodes/template.py
  7. +2
    -1
      mwparserfromhell/parser/__init__.py
  8. +10
    -8
      mwparserfromhell/parser/contexts.py
  9. +11
    -10
      mwparserfromhell/parser/ctokenizer/contexts.h
  10. +32
    -20
      mwparserfromhell/parser/ctokenizer/tok_parse.c
  11. +22
    -13
      mwparserfromhell/parser/tokenizer.py
  12. +2
    -2
      tests/test_docs.py
  13. +12
    -3
      tests/test_template.py
  14. +7
    -0
      tests/tokenizer/integration.mwtest
  15. +37
    -14
      tests/tokenizer/templates.mwtest

+ 2
- 1
.travis.yml View File

@@ -5,7 +5,8 @@ python:
- 3.2
- 3.3
- 3.4
- nightly
- 3.5-dev
sudo: false
install:
- pip install coveralls
- python setup.py build


+ 5
- 0
CHANGELOG View File

@@ -6,6 +6,11 @@ v0.4.1 (unreleased):
- Added support for Python 3.5.
- '<' and '>' are now disallowed in wikilink titles and template names. This
includes when denoting tags, but not comments.
- Fixed the behavior of preserve_spacing in Template.add() and keep_field in
Template.remove() on parameters with hidden keys.
- Fixed parser bugs involving:
- templates with completely blank names;
- templates with newlines and comments.
- Heavy refactoring and fixes to the C tokenizer.
- Fixed some bugs in the release scripts.



+ 1
- 1
README.rst View File

@@ -139,7 +139,7 @@ If you're not using a library, you can parse any page using the following code
from urllib.parse import urlencode
from urllib.request import urlopen
import mwparserfromhell
API_URL = "http://en.wikipedia.org/w/api.php"
API_URL = "https://en.wikipedia.org/w/api.php"

def parse(title):
data = {"action": "query", "prop": "revisions", "rvlimit": 1,


+ 7
- 0
docs/changelog.rst View File

@@ -13,6 +13,13 @@ Unreleased
- Added support for Python 3.5.
- ``<`` and ``>`` are now disallowed in wikilink titles and template names.
This includes when denoting tags, but not comments.
- Fixed the behavior of *preserve_spacing* in :func:`~.Template.add` and
*keep_field* in :func:`~.Template.remove` on parameters with hidden keys.
- Fixed parser bugs involving:

- templates with completely blank names;
- templates with newlines and comments.

- Heavy refactoring and fixes to the C tokenizer.
- Fixed some bugs in the release scripts.



+ 1
- 1
docs/integration.rst View File

@@ -25,7 +25,7 @@ If you're not using a library, you can parse any page using the following code
from urllib.parse import urlencode
from urllib.request import urlopen
import mwparserfromhell
API_URL = "http://en.wikipedia.org/w/api.php"
API_URL = "https://en.wikipedia.org/w/api.php"

def parse(title):
data = {"action": "query", "prop": "revisions", "rvlimit": 1,


+ 58
- 53
mwparserfromhell/nodes/template.py View File

@@ -82,21 +82,11 @@ class Template(Node):
if char in node:
code.replace(node, node.replace(char, replacement), False)

def _blank_param_value(self, value):
"""Remove the content from *value* while keeping its whitespace.

Replace *value*\ 's nodes with two text nodes, the first containing
whitespace from before its content and the second containing whitespace
from after its content.
"""
match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS)
value.nodes = [Text(match.group(1)), Text(match.group(2))]

def _select_theory(self, theories):
"""Return the most likely spacing convention given different options.

Given a dictionary of convention options as keys and their occurrence as
values, return the convention that occurs the most, or ``None`` if
Given a dictionary of convention options as keys and their occurrence
as values, return the convention that occurs the most, or ``None`` if
there is no clear preferred style.
"""
if theories:
@@ -129,34 +119,47 @@ class Template(Node):
after = self._select_theory(after_theories)
return before, after

def _remove_with_field(self, param, i, name):
"""Return True if a parameter name should be kept, otherwise False."""
if param.showkey:
following = self.params[i+1:]
better_matches = [after.name.strip() == name and not after.showkey for after in following]
if any(better_matches):
return False
return True
def _remove_without_field(self, param, i):
"""Return False if a parameter name should be kept, otherwise True."""
if not param.showkey:
dependents = [not after.showkey for after in self.params[i+1:]]
if any(dependents):
return False
return True
def _blank_param_value(self, value):
"""Remove the content from *value* while keeping its whitespace.
Replace *value*\ 's nodes with two text nodes, the first containing
whitespace from before its content and the second containing whitespace
from after its content.
"""
match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS)
value.nodes = [Text(match.group(1)), Text(match.group(2))]
def _fix_dependendent_params(self, i):
"""Unhide keys if necessary after removing the param at index *i*."""
if not self.params[i].showkey:
for param in self.params[i + 1:]:
if not param.showkey:
param.showkey = True

def _remove_exact(self, needle, keep_field):
"""Remove a specific parameter, *needle*, from the template."""
for i, param in enumerate(self.params):
if param is needle:
if keep_field or not self._remove_without_field(param, i):
if keep_field:
self._blank_param_value(param.value)
else:
self._fix_dependendent_params(i)
self.params.pop(i)
return
raise ValueError(needle)

def _should_remove(self, i, name):
"""Look ahead for a parameter with the same name, but hidden.

If one exists, we should remove the given one rather than blanking it.
"""
if self.params[i].showkey:
following = self.params[i + 1:]
better_matches = [after.name.strip() == name and not after.showkey
for after in following]
return any(better_matches)
return False

@property
def name(self):
"""The name of the template, as a :class:`.Wikicode` object."""
@@ -213,26 +216,25 @@ class Template(Node):
:func:`.utils.parse_anything`; pipes and equal signs are automatically
escaped from *value* when appropriate.

If *name* is already a parameter in the template, we'll replace its
value.

If *showkey* is given, this will determine whether or not to show the
parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of
``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent
guess.

If *name* is already a parameter in the template, we'll replace its
value while keeping the same whitespace around it. We will also try to
guess the dominant spacing convention when adding a new parameter using
:meth:`_get_spacing_conventions`.

If *before* is given (either a :class:`.Parameter` object or a name),
then we will place the parameter immediately before this one.
Otherwise, it will be added at the end. If *before* is a name and
exists multiple times in the template, we will place it before the last
occurrence. If *before* is not in the template, :exc:`ValueError` is
raised. The argument is ignored if the new parameter already exists.
raised. The argument is ignored if *name* is an existing parameter.

If *preserve_spacing* is ``False``, we will avoid preserving spacing
conventions when changing the value of an existing parameter or when
adding a new one.
If *preserve_spacing* is ``True``, we will try to preserve whitespace
conventions around the parameter, whether it is new or we are updating
an existing value. It is disabled for parameters with hidden keys,
since MediaWiki doesn't strip whitespace in this case.
"""
name, value = parse_anything(name), parse_anything(value)
self._surface_escape(value, "|")
@@ -245,7 +247,7 @@ class Template(Node):
if not existing.showkey:
self._surface_escape(value, "=")
nodes = existing.value.nodes
if preserve_spacing:
if preserve_spacing and existing.showkey:
for i in range(2): # Ignore empty text nodes
if not nodes[i]:
nodes[i] = None
@@ -271,7 +273,7 @@ class Template(Node):
if not showkey:
self._surface_escape(value, "=")

if preserve_spacing:
if preserve_spacing and showkey:
before_n, after_n = self._get_spacing_conventions(use_names=True)
before_v, after_v = self._get_spacing_conventions(use_names=False)
name = parse_anything([before_n, name, after_n])
@@ -294,36 +296,39 @@ class Template(Node):
and :meth:`get`.

If *keep_field* is ``True``, we will keep the parameter's name, but
blank its value. Otherwise, we will remove the parameter completely
*unless* other parameters are dependent on it (e.g. removing ``bar``
from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what
we expected, so ``{{foo||baz}}`` will be produced instead).
blank its value. Otherwise, we will remove the parameter completely.

When removing a parameter with a hidden name, subsequent parameters
with hidden names will be made visible. For example, removing ``bar``
from ``{{foo|bar|baz}}`` produces ``{{foo|2=baz}}`` because
``{{foo|baz}}`` is incorrect.

If the parameter shows up multiple times in the template and *param* is
not a :class:`.Parameter` object, we will remove all instances of it
(and keep only one if *keep_field* is ``True`` - the first instance if
none have dependents, otherwise the one with dependents will be kept).
(and keep only one if *keep_field* is ``True`` - either the one with a
hidden name, if it exists, or the first instance).
"""
if isinstance(param, Parameter):
return self._remove_exact(param, keep_field)

name = str(param).strip()
removed = False
to_remove = []

for i, param in enumerate(self.params):
if param.name.strip() == name:
if keep_field:
if self._remove_with_field(param, i, name):
self._blank_param_value(param.value)
keep_field = False
else:
to_remove.append(i)
else:
if self._remove_without_field(param, i):
if self._should_remove(i, name):
to_remove.append(i)
else:
self._blank_param_value(param.value)
keep_field = False
else:
self._fix_dependendent_params(i)
to_remove.append(i)
if not removed:
removed = True

if not removed:
raise ValueError(name)
for i in reversed(to_remove):


+ 2
- 1
mwparserfromhell/parser/__init__.py View File

@@ -40,11 +40,11 @@ class ParserError(Exception):


from .builder import Builder
from .tokenizer import Tokenizer
try:
from ._tokenizer import CTokenizer
use_c = True
except ImportError:
from .tokenizer import Tokenizer
CTokenizer = None
use_c = False

@@ -70,6 +70,7 @@ class Parser(object):
if use_c and CTokenizer:
self._tokenizer = CTokenizer()
else:
from .tokenizer import Tokenizer
self._tokenizer = Tokenizer()
self._builder = Builder()



+ 10
- 8
mwparserfromhell/parser/contexts.py View File

@@ -89,6 +89,7 @@ Local (stack-specific) contexts:
* :const:`FAIL_ON_LBRACE`
* :const:`FAIL_ON_RBRACE`
* :const:`FAIL_ON_EQUALS`
* :const:`HAS_TEMPLATE`

* :const:`TABLE`

@@ -161,15 +162,16 @@ FAIL_NEXT = 1 << 26
FAIL_ON_LBRACE = 1 << 27
FAIL_ON_RBRACE = 1 << 28
FAIL_ON_EQUALS = 1 << 29
HAS_TEMPLATE = 1 << 30
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
FAIL_ON_RBRACE + FAIL_ON_EQUALS)
TABLE_OPEN = 1 << 30
TABLE_CELL_OPEN = 1 << 31
TABLE_CELL_STYLE = 1 << 32
TABLE_ROW_OPEN = 1 << 33
TABLE_TD_LINE = 1 << 34
TABLE_TH_LINE = 1 << 35
FAIL_ON_RBRACE + FAIL_ON_EQUALS + HAS_TEMPLATE)
TABLE_OPEN = 1 << 31
TABLE_CELL_OPEN = 1 << 32
TABLE_CELL_STYLE = 1 << 33
TABLE_ROW_OPEN = 1 << 34
TABLE_TD_LINE = 1 << 35
TABLE_TH_LINE = 1 << 36
TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE
TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN +
TABLE_TD_LINE + TABLE_TH_LINE)


+ 11
- 10
mwparserfromhell/parser/ctokenizer/contexts.h View File

@@ -63,22 +63,23 @@ SOFTWARE.

#define LC_DLTERM 0x0000000000800000

#define LC_SAFETY_CHECK 0x000000003F000000
#define LC_SAFETY_CHECK 0x000000007F000000
#define LC_HAS_TEXT 0x0000000001000000
#define LC_FAIL_ON_TEXT 0x0000000002000000
#define LC_FAIL_NEXT 0x0000000004000000
#define LC_FAIL_ON_LBRACE 0x0000000008000000
#define LC_FAIL_ON_RBRACE 0x0000000010000000
#define LC_FAIL_ON_EQUALS 0x0000000020000000

#define LC_TABLE 0x0000000FC0000000
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000
#define LC_TABLE_OPEN 0x0000000040000000
#define LC_TABLE_CELL_OPEN 0x0000000080000000
#define LC_TABLE_CELL_STYLE 0x0000000100000000
#define LC_TABLE_ROW_OPEN 0x0000000200000000
#define LC_TABLE_TD_LINE 0x0000000400000000
#define LC_TABLE_TH_LINE 0x0000000800000000
#define LC_HAS_TEMPLATE 0x0000000040000000

#define LC_TABLE 0x0000001F80000000
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000001A00000000
#define LC_TABLE_OPEN 0x0000000080000000
#define LC_TABLE_CELL_OPEN 0x0000000100000000
#define LC_TABLE_CELL_STYLE 0x0000000200000000
#define LC_TABLE_ROW_OPEN 0x0000000400000000
#define LC_TABLE_TD_LINE 0x0000000800000000
#define LC_TABLE_TH_LINE 0x0000001000000000

/* Global contexts */



+ 32
- 20
mwparserfromhell/parser/ctokenizer/tok_parse.c View File

@@ -121,12 +121,16 @@ static PyObject* strip_tag_name(PyObject* token, int take_attr)
/*
Parse a template at the head of the wikicode string.
*/
static int Tokenizer_parse_template(Tokenizer* self)
static int Tokenizer_parse_template(Tokenizer* self, int has_content)
{
PyObject *template;
Py_ssize_t reset = self->head;
uint64_t context = LC_TEMPLATE_NAME;

template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1);
if (has_content)
context |= LC_HAS_TEMPLATE;

template = Tokenizer_parse(self, context, 1);
if (BAD_ROUTE) {
self->head = reset;
return 0;
@@ -182,6 +186,7 @@ static int Tokenizer_parse_argument(Tokenizer* self)
static int Tokenizer_parse_template_or_argument(Tokenizer* self)
{
unsigned int braces = 2, i;
int has_content = 0;
PyObject *tokenlist;

self->head += 2;
@@ -198,7 +203,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
return 0;
}
if (braces == 2) {
if (Tokenizer_parse_template(self))
if (Tokenizer_parse_template(self, has_content))
return -1;
if (BAD_ROUTE) {
RESET_ROUTE();
@@ -212,7 +217,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
return -1;
if (BAD_ROUTE) {
RESET_ROUTE();
if (Tokenizer_parse_template(self))
if (Tokenizer_parse_template(self, has_content))
return -1;
if (BAD_ROUTE) {
char text[MAX_BRACES + 1];
@@ -228,8 +233,10 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
}
else
braces -= 3;
if (braces)
if (braces) {
has_content = 1;
self->head++;
}
}
tokenlist = Tokenizer_pop(self);
if (!tokenlist)
@@ -251,8 +258,13 @@ static int Tokenizer_handle_template_param(Tokenizer* self)
{
PyObject *stack;

if (self->topstack->context & LC_TEMPLATE_NAME)
if (self->topstack->context & LC_TEMPLATE_NAME) {
if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) {
Tokenizer_fail_route(self);
return -1;
}
self->topstack->context ^= LC_TEMPLATE_NAME;
}
else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE)
self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE;
if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
@@ -303,7 +315,11 @@ static PyObject* Tokenizer_handle_template_end(Tokenizer* self)
{
PyObject* stack;

if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
if (self->topstack->context & LC_TEMPLATE_NAME) {
if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE)))
return Tokenizer_fail_route(self);
}
else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
stack = Tokenizer_pop_keeping_context(self);
if (!stack)
return NULL;
@@ -2428,30 +2444,26 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data)
if (context & LC_TAG_CLOSE)
return (data == '<') ? -1 : 0;
if (context & LC_TEMPLATE_NAME) {
if (data == '{' || data == '}' || data == '[') {
if (data == '{') {
self->topstack->context |= LC_HAS_TEMPLATE | LC_FAIL_NEXT;
return 0;
}
if (data == '}' || (data == '<' && Tokenizer_READ(self, 1) == '!')) {
self->topstack->context |= LC_FAIL_NEXT;
return 0;
}
if (data == ']' || data == '>' || (data == '<' &&
Tokenizer_READ(self, 1) != '!')) {
if (data == '[' || data == ']' || data == '<' || data == '>') {
return -1;
}
if (data == '|')
return 0;
if (context & LC_HAS_TEXT) {
if (context & LC_FAIL_ON_TEXT) {
if (!Py_UNICODE_ISSPACE(data)) {
if (data == '<' && Tokenizer_READ(self, 1) == '!') {
self->topstack->context |= LC_FAIL_NEXT;
return 0;
}
if (!Py_UNICODE_ISSPACE(data))
return -1;
}
}
else {
if (data == '\n')
self->topstack->context |= LC_FAIL_ON_TEXT;
}
else if (data == '\n')
self->topstack->context |= LC_FAIL_ON_TEXT;
}
else if (!Py_UNICODE_ISSPACE(data))
self->topstack->context |= LC_HAS_TEXT;


+ 22
- 13
mwparserfromhell/parser/tokenizer.py View File

@@ -192,11 +192,14 @@ class Tokenizer(object):
self._fail_route()
return self.END

def _parse_template(self):
def _parse_template(self, has_content):
"""Parse a template at the head of the wikicode string."""
reset = self._head
context = contexts.TEMPLATE_NAME
if has_content:
context |= contexts.HAS_TEMPLATE
try:
template = self._parse(contexts.TEMPLATE_NAME)
template = self._parse(context)
except BadRoute:
self._head = reset
raise
@@ -223,6 +226,7 @@ class Tokenizer(object):
while self._read() == "{":
self._head += 1
braces += 1
has_content = False
self._push()

while braces:
@@ -230,7 +234,7 @@ class Tokenizer(object):
return self._emit_text_then_stack("{")
if braces == 2:
try:
self._parse_template()
self._parse_template(has_content)
except BadRoute:
return self._emit_text_then_stack("{{")
break
@@ -239,11 +243,12 @@ class Tokenizer(object):
braces -= 3
except BadRoute:
try:
self._parse_template()
self._parse_template(has_content)
braces -= 2
except BadRoute:
return self._emit_text_then_stack("{" * braces)
if braces:
has_content = True
self._head += 1

self._emit_all(self._pop())
@@ -253,6 +258,8 @@ class Tokenizer(object):
def _handle_template_param(self):
"""Handle a template parameter at the head of the string."""
if self._context & contexts.TEMPLATE_NAME:
if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE):
self._fail_route()
self._context ^= contexts.TEMPLATE_NAME
elif self._context & contexts.TEMPLATE_PARAM_VALUE:
self._context ^= contexts.TEMPLATE_PARAM_VALUE
@@ -271,7 +278,10 @@ class Tokenizer(object):

def _handle_template_end(self):
"""Handle the end of a template at the head of the string."""
if self._context & contexts.TEMPLATE_PARAM_KEY:
if self._context & contexts.TEMPLATE_NAME:
if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE):
self._fail_route()
elif self._context & contexts.TEMPLATE_PARAM_KEY:
self._emit_all(self._pop(keep_context=True))
self._head += 1
return self._pop()
@@ -1183,23 +1193,22 @@ class Tokenizer(object):
elif context & contexts.EXT_LINK_TITLE:
return this != "\n"
elif context & contexts.TEMPLATE_NAME:
if this == "{" or this == "}" or this == "[":
if this == "{":
self._context |= contexts.HAS_TEMPLATE | contexts.FAIL_NEXT
return True
if this == "}" or (this == "<" and self._read(1) == "!"):
self._context |= contexts.FAIL_NEXT
return True
if this == "]" or this == ">" or (this == "<" and self._read(1) != "!"):
if this == "[" or this == "]" or this == "<" or this == ">":
return False
if this == "|":
return True
if context & contexts.HAS_TEXT:
if context & contexts.FAIL_ON_TEXT:
if this is self.END or not this.isspace():
if this == "<" and self._read(1) == "!":
self._context |= contexts.FAIL_NEXT
return True
return False
else:
if this == "\n":
self._context |= contexts.FAIL_ON_TEXT
elif this == "\n":
self._context |= contexts.FAIL_ON_TEXT
elif this is self.END or not this.isspace():
self._context |= contexts.HAS_TEXT
return True


+ 2
- 2
tests/test_docs.py View File

@@ -115,8 +115,8 @@ class TestDocs(unittest.TestCase):
@unittest.skipIf("NOWEB" in os.environ, "web test disabled by environ var")
def test_readme_5(self):
"""test a block of example code in the README; includes a web call"""
url1 = "http://en.wikipedia.org/w/api.php"
url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw"
url1 = "https://en.wikipedia.org/w/api.php"
url2 = "https://en.wikipedia.org/w/index.php?title={0}&action=raw"
title = "Test"
data = {"action": "query", "prop": "revisions", "rvlimit": 1,
"rvprop": "content", "format": "json", "titles": title}


+ 12
- 3
tests/test_template.py View File

@@ -213,6 +213,9 @@ class TestTemplate(TreeEqualityTestCase):
pgens("f", "g")])
node37 = Template(wraptext("a"), [pgenh("1", "")])
node38 = Template(wraptext("abc"))
node39 = Template(wraptext("a"), [pgenh("1", " b ")])
node40 = Template(wraptext("a"), [pgenh("1", " b"), pgenh("2", " c")])
node41 = Template(wraptext("a"), [pgens("1", " b"), pgens("2", " c")])

node1.add("e", "f", showkey=True)
node2.add(2, "g", showkey=False)
@@ -255,6 +258,9 @@ class TestTemplate(TreeEqualityTestCase):
node37.add(1, "b")
node38.add("1", "foo")
self.assertRaises(ValueError, node38.add, "z", "bar", showkey=False)
node39.add("1", "c")
node40.add("3", "d")
node41.add("3", "d")

self.assertEqual("{{a|b=c|d|e=f}}", node1)
self.assertEqual("{{a|b=c|d|g}}", node2)
@@ -299,6 +305,9 @@ class TestTemplate(TreeEqualityTestCase):
self.assertEqual("{{a|b=c|d=h|f=g}}", node36)
self.assertEqual("{{a|b}}", node37)
self.assertEqual("{{abc|foo}}", node38)
self.assertEqual("{{a|c}}", node39)
self.assertEqual("{{a| b| c|d}}", node40)
self.assertEqual("{{a|1= b|2= c|3= d}}", node41)

def test_remove(self):
"""test Template.remove()"""
@@ -395,13 +404,13 @@ class TestTemplate(TreeEqualityTestCase):
self.assertRaises(ValueError, node2.remove, "1")
self.assertEqual("{{foo}}", node2)
self.assertEqual("{{foo||abc=}}", node3)
self.assertEqual("{{foo||baz}}", node4)
self.assertEqual("{{foo|2=baz}}", node4)
self.assertEqual("{{foo|b=c}}", node5)
self.assertEqual("{{foo| a=|b=c}}", node6)
self.assertEqual("{{foo|1 =|2=c}}", node7)
self.assertEqual("{{foo|2=c}}", node8)
self.assertEqual("{{foo||c}}", node9)
self.assertEqual("{{foo||c}}", node10)
self.assertEqual("{{foo|2=c}}", node10)
self.assertEqual("{{foo|b=c|a =d}}", node11)
self.assertEqual("{{foo| a=|b=c|a =d}}", node12)
self.assertEqual("{{foo| a=b|a =d}}", node13)
@@ -410,7 +419,7 @@ class TestTemplate(TreeEqualityTestCase):
self.assertEqual("{{foo| a=b|b=c|a =}}", node16)
self.assertEqual("{{foo|b|c}}", node17)
self.assertEqual("{{foo|1 =|b|c}}", node18)
self.assertEqual("{{foo|1 =a||c}}", node19)
self.assertEqual("{{foo|1 =a|2=c}}", node19)
self.assertEqual("{{foo|1 =a||c}}", node20)
self.assertEqual("{{foo|c=d|e=f}}", node21)
self.assertEqual("{{foo|a=|c=d|e=f}}", node22)


+ 7
- 0
tests/tokenizer/integration.mwtest View File

@@ -244,6 +244,13 @@ output: [Text(text="{{foobar\n<!|key=value}}")]

---

name: newline_and_comment_in_template_name_8
label: a template name containing a newline followed by a comment
input: "{{<!-- comment -->\nfoobar\n<!-- comment -->}}"
output: [TemplateOpen(), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\nfoobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateClose()]

---

name: tag_in_link_title
label: HTML tags are invalid in link titles, even when complete
input: "[[foo<i>bar</i>baz]]"


+ 37
- 14
tests/tokenizer/templates.mwtest View File

@@ -1,17 +1,3 @@
name: blank
label: template with no content
input: "{{}}"
output: [TemplateOpen(), TemplateClose()]


name: blank_with_params
label: template with no content, but pipes and equal signs
input: "{{||=|}}"
output: [TemplateOpen(), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()]


name: no_params
label: simplest type of template
input: "{{template}}"
@@ -61,6 +47,13 @@ output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="

---

name: blank_params
label: template with blank parameters (mix of pipes and equal signs)
input: "{{,||=|}}"
output: [TemplateOpen(), Text(text=","), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()]

---

name: nested_unnamed_param
label: nested template as an unnamed parameter
input: "{{foo|{{bar}}}}"
@@ -390,6 +383,34 @@ output: [TemplateOpen(), Text(text="foo\n "), TemplateParamSeparator(), Text(te

---

name: invalid_blank
label: invalid template with no content
input: "{{}}"
output: [Text(text="{{}}")]

---

name: invalid_blank_whitespace
label: invalid template with no content, but whitespace
input: "{{ }}"
output: [Text(text="{{ }}")]

---

name: invalid_blank_pipe
label: invalid template with no content, but a parameter
input: "{{|foo}}"
output: [Text(text="{{|foo}}")]

---

name: invalid_blank_whitespace_pipe
label: invalid template with no content, but whitespace and a parameter
input: "{{ |foo}}"
output: [Text(text="{{ |foo}}")]

---

name: invalid_name_left_brace_middle
label: invalid characters in template name: left brace in middle
input: "{{foo{bar}}"
@@ -665,5 +686,5 @@ output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{

name: recursion_opens_and_closes
label: test potentially dangerous recursion: template openings and closings
input: "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}"
output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")]
input: "{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}"
output: [Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), TemplateOpen(), Text(text="x"), TemplateParamSeparator(), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x"), TemplateParamSeparator(), Text(text="{{x"), TemplateClose(), Text(text="{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}")]

Loading…
Cancel
Save