@@ -5,7 +5,8 @@ python: | |||
- 3.2 | |||
- 3.3 | |||
- 3.4 | |||
- nightly | |||
- 3.5-dev | |||
sudo: false | |||
install: | |||
- pip install coveralls | |||
- python setup.py build | |||
@@ -6,6 +6,11 @@ v0.4.1 (unreleased): | |||
- Added support for Python 3.5. | |||
- '<' and '>' are now disallowed in wikilink titles and template names. This | |||
includes when denoting tags, but not comments. | |||
- Fixed the behavior of preserve_spacing in Template.add() and keep_field in | |||
Template.remove() on parameters with hidden keys. | |||
- Fixed parser bugs involving: | |||
- templates with completely blank names; | |||
- templates with newlines and comments. | |||
- Heavy refactoring and fixes to the C tokenizer. | |||
- Fixed some bugs in the release scripts. | |||
@@ -139,7 +139,7 @@ If you're not using a library, you can parse any page using the following code | |||
from urllib.parse import urlencode | |||
from urllib.request import urlopen | |||
import mwparserfromhell | |||
API_URL = "http://en.wikipedia.org/w/api.php" | |||
API_URL = "https://en.wikipedia.org/w/api.php" | |||
def parse(title): | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
@@ -13,6 +13,13 @@ Unreleased | |||
- Added support for Python 3.5. | |||
- ``<`` and ``>`` are now disallowed in wikilink titles and template names. | |||
This includes when denoting tags, but not comments. | |||
- Fixed the behavior of *preserve_spacing* in :func:`~.Template.add` and | |||
*keep_field* in :func:`~.Template.remove` on parameters with hidden keys. | |||
- Fixed parser bugs involving: | |||
- templates with completely blank names; | |||
- templates with newlines and comments. | |||
- Heavy refactoring and fixes to the C tokenizer. | |||
- Fixed some bugs in the release scripts. | |||
@@ -25,7 +25,7 @@ If you're not using a library, you can parse any page using the following code | |||
from urllib.parse import urlencode | |||
from urllib.request import urlopen | |||
import mwparserfromhell | |||
API_URL = "http://en.wikipedia.org/w/api.php" | |||
API_URL = "https://en.wikipedia.org/w/api.php" | |||
def parse(title): | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
@@ -82,21 +82,11 @@ class Template(Node): | |||
if char in node: | |||
code.replace(node, node.replace(char, replacement), False) | |||
def _blank_param_value(self, value): | |||
"""Remove the content from *value* while keeping its whitespace. | |||
Replace *value*\ 's nodes with two text nodes, the first containing | |||
whitespace from before its content and the second containing whitespace | |||
from after its content. | |||
""" | |||
match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS) | |||
value.nodes = [Text(match.group(1)), Text(match.group(2))] | |||
def _select_theory(self, theories): | |||
"""Return the most likely spacing convention given different options. | |||
Given a dictionary of convention options as keys and their occurrence as | |||
values, return the convention that occurs the most, or ``None`` if | |||
Given a dictionary of convention options as keys and their occurrence | |||
as values, return the convention that occurs the most, or ``None`` if | |||
there is no clear preferred style. | |||
""" | |||
if theories: | |||
@@ -129,34 +119,47 @@ class Template(Node): | |||
after = self._select_theory(after_theories) | |||
return before, after | |||
def _remove_with_field(self, param, i, name): | |||
"""Return True if a parameter name should be kept, otherwise False.""" | |||
if param.showkey: | |||
following = self.params[i+1:] | |||
better_matches = [after.name.strip() == name and not after.showkey for after in following] | |||
if any(better_matches): | |||
return False | |||
return True | |||
def _remove_without_field(self, param, i): | |||
"""Return False if a parameter name should be kept, otherwise True.""" | |||
if not param.showkey: | |||
dependents = [not after.showkey for after in self.params[i+1:]] | |||
if any(dependents): | |||
return False | |||
return True | |||
def _blank_param_value(self, value): | |||
"""Remove the content from *value* while keeping its whitespace. | |||
Replace *value*\ 's nodes with two text nodes, the first containing | |||
whitespace from before its content and the second containing whitespace | |||
from after its content. | |||
""" | |||
match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS) | |||
value.nodes = [Text(match.group(1)), Text(match.group(2))] | |||
def _fix_dependendent_params(self, i): | |||
"""Unhide keys if necessary after removing the param at index *i*.""" | |||
if not self.params[i].showkey: | |||
for param in self.params[i + 1:]: | |||
if not param.showkey: | |||
param.showkey = True | |||
def _remove_exact(self, needle, keep_field): | |||
"""Remove a specific parameter, *needle*, from the template.""" | |||
for i, param in enumerate(self.params): | |||
if param is needle: | |||
if keep_field or not self._remove_without_field(param, i): | |||
if keep_field: | |||
self._blank_param_value(param.value) | |||
else: | |||
self._fix_dependendent_params(i) | |||
self.params.pop(i) | |||
return | |||
raise ValueError(needle) | |||
def _should_remove(self, i, name): | |||
"""Look ahead for a parameter with the same name, but hidden. | |||
If one exists, we should remove the given one rather than blanking it. | |||
""" | |||
if self.params[i].showkey: | |||
following = self.params[i + 1:] | |||
better_matches = [after.name.strip() == name and not after.showkey | |||
for after in following] | |||
return any(better_matches) | |||
return False | |||
@property | |||
def name(self): | |||
"""The name of the template, as a :class:`.Wikicode` object.""" | |||
@@ -213,26 +216,25 @@ class Template(Node): | |||
:func:`.utils.parse_anything`; pipes and equal signs are automatically | |||
escaped from *value* when appropriate. | |||
If *name* is already a parameter in the template, we'll replace its | |||
value. | |||
If *showkey* is given, this will determine whether or not to show the | |||
parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of | |||
``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent | |||
guess. | |||
If *name* is already a parameter in the template, we'll replace its | |||
value while keeping the same whitespace around it. We will also try to | |||
guess the dominant spacing convention when adding a new parameter using | |||
:meth:`_get_spacing_conventions`. | |||
If *before* is given (either a :class:`.Parameter` object or a name), | |||
then we will place the parameter immediately before this one. | |||
Otherwise, it will be added at the end. If *before* is a name and | |||
exists multiple times in the template, we will place it before the last | |||
occurrence. If *before* is not in the template, :exc:`ValueError` is | |||
raised. The argument is ignored if the new parameter already exists. | |||
raised. The argument is ignored if *name* is an existing parameter. | |||
If *preserve_spacing* is ``False``, we will avoid preserving spacing | |||
conventions when changing the value of an existing parameter or when | |||
adding a new one. | |||
If *preserve_spacing* is ``True``, we will try to preserve whitespace | |||
conventions around the parameter, whether it is new or we are updating | |||
an existing value. It is disabled for parameters with hidden keys, | |||
since MediaWiki doesn't strip whitespace in this case. | |||
""" | |||
name, value = parse_anything(name), parse_anything(value) | |||
self._surface_escape(value, "|") | |||
@@ -245,7 +247,7 @@ class Template(Node): | |||
if not existing.showkey: | |||
self._surface_escape(value, "=") | |||
nodes = existing.value.nodes | |||
if preserve_spacing: | |||
if preserve_spacing and existing.showkey: | |||
for i in range(2): # Ignore empty text nodes | |||
if not nodes[i]: | |||
nodes[i] = None | |||
@@ -271,7 +273,7 @@ class Template(Node): | |||
if not showkey: | |||
self._surface_escape(value, "=") | |||
if preserve_spacing: | |||
if preserve_spacing and showkey: | |||
before_n, after_n = self._get_spacing_conventions(use_names=True) | |||
before_v, after_v = self._get_spacing_conventions(use_names=False) | |||
name = parse_anything([before_n, name, after_n]) | |||
@@ -294,36 +296,39 @@ class Template(Node): | |||
and :meth:`get`. | |||
If *keep_field* is ``True``, we will keep the parameter's name, but | |||
blank its value. Otherwise, we will remove the parameter completely | |||
*unless* other parameters are dependent on it (e.g. removing ``bar`` | |||
from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what | |||
we expected, so ``{{foo||baz}}`` will be produced instead). | |||
blank its value. Otherwise, we will remove the parameter completely. | |||
When removing a parameter with a hidden name, subsequent parameters | |||
with hidden names will be made visible. For example, removing ``bar`` | |||
from ``{{foo|bar|baz}}`` produces ``{{foo|2=baz}}`` because | |||
``{{foo|baz}}`` is incorrect. | |||
If the parameter shows up multiple times in the template and *param* is | |||
not a :class:`.Parameter` object, we will remove all instances of it | |||
(and keep only one if *keep_field* is ``True`` - the first instance if | |||
none have dependents, otherwise the one with dependents will be kept). | |||
(and keep only one if *keep_field* is ``True`` - either the one with a | |||
hidden name, if it exists, or the first instance). | |||
""" | |||
if isinstance(param, Parameter): | |||
return self._remove_exact(param, keep_field) | |||
name = str(param).strip() | |||
removed = False | |||
to_remove = [] | |||
for i, param in enumerate(self.params): | |||
if param.name.strip() == name: | |||
if keep_field: | |||
if self._remove_with_field(param, i, name): | |||
self._blank_param_value(param.value) | |||
keep_field = False | |||
else: | |||
to_remove.append(i) | |||
else: | |||
if self._remove_without_field(param, i): | |||
if self._should_remove(i, name): | |||
to_remove.append(i) | |||
else: | |||
self._blank_param_value(param.value) | |||
keep_field = False | |||
else: | |||
self._fix_dependendent_params(i) | |||
to_remove.append(i) | |||
if not removed: | |||
removed = True | |||
if not removed: | |||
raise ValueError(name) | |||
for i in reversed(to_remove): | |||
@@ -40,11 +40,11 @@ class ParserError(Exception): | |||
from .builder import Builder | |||
from .tokenizer import Tokenizer | |||
try: | |||
from ._tokenizer import CTokenizer | |||
use_c = True | |||
except ImportError: | |||
from .tokenizer import Tokenizer | |||
CTokenizer = None | |||
use_c = False | |||
@@ -70,6 +70,7 @@ class Parser(object): | |||
if use_c and CTokenizer: | |||
self._tokenizer = CTokenizer() | |||
else: | |||
from .tokenizer import Tokenizer | |||
self._tokenizer = Tokenizer() | |||
self._builder = Builder() | |||
@@ -89,6 +89,7 @@ Local (stack-specific) contexts: | |||
* :const:`FAIL_ON_LBRACE` | |||
* :const:`FAIL_ON_RBRACE` | |||
* :const:`FAIL_ON_EQUALS` | |||
* :const:`HAS_TEMPLATE` | |||
* :const:`TABLE` | |||
@@ -161,15 +162,16 @@ FAIL_NEXT = 1 << 26 | |||
FAIL_ON_LBRACE = 1 << 27 | |||
FAIL_ON_RBRACE = 1 << 28 | |||
FAIL_ON_EQUALS = 1 << 29 | |||
HAS_TEMPLATE = 1 << 30 | |||
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | |||
FAIL_ON_RBRACE + FAIL_ON_EQUALS) | |||
TABLE_OPEN = 1 << 30 | |||
TABLE_CELL_OPEN = 1 << 31 | |||
TABLE_CELL_STYLE = 1 << 32 | |||
TABLE_ROW_OPEN = 1 << 33 | |||
TABLE_TD_LINE = 1 << 34 | |||
TABLE_TH_LINE = 1 << 35 | |||
FAIL_ON_RBRACE + FAIL_ON_EQUALS + HAS_TEMPLATE) | |||
TABLE_OPEN = 1 << 31 | |||
TABLE_CELL_OPEN = 1 << 32 | |||
TABLE_CELL_STYLE = 1 << 33 | |||
TABLE_ROW_OPEN = 1 << 34 | |||
TABLE_TD_LINE = 1 << 35 | |||
TABLE_TH_LINE = 1 << 36 | |||
TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE | |||
TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN + | |||
TABLE_TD_LINE + TABLE_TH_LINE) | |||
@@ -63,22 +63,23 @@ SOFTWARE. | |||
#define LC_DLTERM 0x0000000000800000 | |||
#define LC_SAFETY_CHECK 0x000000003F000000 | |||
#define LC_SAFETY_CHECK 0x000000007F000000 | |||
#define LC_HAS_TEXT 0x0000000001000000 | |||
#define LC_FAIL_ON_TEXT 0x0000000002000000 | |||
#define LC_FAIL_NEXT 0x0000000004000000 | |||
#define LC_FAIL_ON_LBRACE 0x0000000008000000 | |||
#define LC_FAIL_ON_RBRACE 0x0000000010000000 | |||
#define LC_FAIL_ON_EQUALS 0x0000000020000000 | |||
#define LC_TABLE 0x0000000FC0000000 | |||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000 | |||
#define LC_TABLE_OPEN 0x0000000040000000 | |||
#define LC_TABLE_CELL_OPEN 0x0000000080000000 | |||
#define LC_TABLE_CELL_STYLE 0x0000000100000000 | |||
#define LC_TABLE_ROW_OPEN 0x0000000200000000 | |||
#define LC_TABLE_TD_LINE 0x0000000400000000 | |||
#define LC_TABLE_TH_LINE 0x0000000800000000 | |||
#define LC_HAS_TEMPLATE 0x0000000040000000 | |||
#define LC_TABLE 0x0000001F80000000 | |||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000001A00000000 | |||
#define LC_TABLE_OPEN 0x0000000080000000 | |||
#define LC_TABLE_CELL_OPEN 0x0000000100000000 | |||
#define LC_TABLE_CELL_STYLE 0x0000000200000000 | |||
#define LC_TABLE_ROW_OPEN 0x0000000400000000 | |||
#define LC_TABLE_TD_LINE 0x0000000800000000 | |||
#define LC_TABLE_TH_LINE 0x0000001000000000 | |||
/* Global contexts */ | |||
@@ -121,12 +121,16 @@ static PyObject* strip_tag_name(PyObject* token, int take_attr) | |||
/* | |||
Parse a template at the head of the wikicode string. | |||
*/ | |||
static int Tokenizer_parse_template(Tokenizer* self) | |||
static int Tokenizer_parse_template(Tokenizer* self, int has_content) | |||
{ | |||
PyObject *template; | |||
Py_ssize_t reset = self->head; | |||
uint64_t context = LC_TEMPLATE_NAME; | |||
template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1); | |||
if (has_content) | |||
context |= LC_HAS_TEMPLATE; | |||
template = Tokenizer_parse(self, context, 1); | |||
if (BAD_ROUTE) { | |||
self->head = reset; | |||
return 0; | |||
@@ -182,6 +186,7 @@ static int Tokenizer_parse_argument(Tokenizer* self) | |||
static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||
{ | |||
unsigned int braces = 2, i; | |||
int has_content = 0; | |||
PyObject *tokenlist; | |||
self->head += 2; | |||
@@ -198,7 +203,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||
return 0; | |||
} | |||
if (braces == 2) { | |||
if (Tokenizer_parse_template(self)) | |||
if (Tokenizer_parse_template(self, has_content)) | |||
return -1; | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
@@ -212,7 +217,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||
return -1; | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
if (Tokenizer_parse_template(self)) | |||
if (Tokenizer_parse_template(self, has_content)) | |||
return -1; | |||
if (BAD_ROUTE) { | |||
char text[MAX_BRACES + 1]; | |||
@@ -228,8 +233,10 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||
} | |||
else | |||
braces -= 3; | |||
if (braces) | |||
if (braces) { | |||
has_content = 1; | |||
self->head++; | |||
} | |||
} | |||
tokenlist = Tokenizer_pop(self); | |||
if (!tokenlist) | |||
@@ -251,8 +258,13 @@ static int Tokenizer_handle_template_param(Tokenizer* self) | |||
{ | |||
PyObject *stack; | |||
if (self->topstack->context & LC_TEMPLATE_NAME) | |||
if (self->topstack->context & LC_TEMPLATE_NAME) { | |||
if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) { | |||
Tokenizer_fail_route(self); | |||
return -1; | |||
} | |||
self->topstack->context ^= LC_TEMPLATE_NAME; | |||
} | |||
else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) | |||
self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE; | |||
if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { | |||
@@ -303,7 +315,11 @@ static PyObject* Tokenizer_handle_template_end(Tokenizer* self) | |||
{ | |||
PyObject* stack; | |||
if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { | |||
if (self->topstack->context & LC_TEMPLATE_NAME) { | |||
if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) | |||
return Tokenizer_fail_route(self); | |||
} | |||
else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { | |||
stack = Tokenizer_pop_keeping_context(self); | |||
if (!stack) | |||
return NULL; | |||
@@ -2428,30 +2444,26 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data) | |||
if (context & LC_TAG_CLOSE) | |||
return (data == '<') ? -1 : 0; | |||
if (context & LC_TEMPLATE_NAME) { | |||
if (data == '{' || data == '}' || data == '[') { | |||
if (data == '{') { | |||
self->topstack->context |= LC_HAS_TEMPLATE | LC_FAIL_NEXT; | |||
return 0; | |||
} | |||
if (data == '}' || (data == '<' && Tokenizer_READ(self, 1) == '!')) { | |||
self->topstack->context |= LC_FAIL_NEXT; | |||
return 0; | |||
} | |||
if (data == ']' || data == '>' || (data == '<' && | |||
Tokenizer_READ(self, 1) != '!')) { | |||
if (data == '[' || data == ']' || data == '<' || data == '>') { | |||
return -1; | |||
} | |||
if (data == '|') | |||
return 0; | |||
if (context & LC_HAS_TEXT) { | |||
if (context & LC_FAIL_ON_TEXT) { | |||
if (!Py_UNICODE_ISSPACE(data)) { | |||
if (data == '<' && Tokenizer_READ(self, 1) == '!') { | |||
self->topstack->context |= LC_FAIL_NEXT; | |||
return 0; | |||
} | |||
if (!Py_UNICODE_ISSPACE(data)) | |||
return -1; | |||
} | |||
} | |||
else { | |||
if (data == '\n') | |||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||
} | |||
else if (data == '\n') | |||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||
} | |||
else if (!Py_UNICODE_ISSPACE(data)) | |||
self->topstack->context |= LC_HAS_TEXT; | |||
@@ -192,11 +192,14 @@ class Tokenizer(object): | |||
self._fail_route() | |||
return self.END | |||
def _parse_template(self): | |||
def _parse_template(self, has_content): | |||
"""Parse a template at the head of the wikicode string.""" | |||
reset = self._head | |||
context = contexts.TEMPLATE_NAME | |||
if has_content: | |||
context |= contexts.HAS_TEMPLATE | |||
try: | |||
template = self._parse(contexts.TEMPLATE_NAME) | |||
template = self._parse(context) | |||
except BadRoute: | |||
self._head = reset | |||
raise | |||
@@ -223,6 +226,7 @@ class Tokenizer(object): | |||
while self._read() == "{": | |||
self._head += 1 | |||
braces += 1 | |||
has_content = False | |||
self._push() | |||
while braces: | |||
@@ -230,7 +234,7 @@ class Tokenizer(object): | |||
return self._emit_text_then_stack("{") | |||
if braces == 2: | |||
try: | |||
self._parse_template() | |||
self._parse_template(has_content) | |||
except BadRoute: | |||
return self._emit_text_then_stack("{{") | |||
break | |||
@@ -239,11 +243,12 @@ class Tokenizer(object): | |||
braces -= 3 | |||
except BadRoute: | |||
try: | |||
self._parse_template() | |||
self._parse_template(has_content) | |||
braces -= 2 | |||
except BadRoute: | |||
return self._emit_text_then_stack("{" * braces) | |||
if braces: | |||
has_content = True | |||
self._head += 1 | |||
self._emit_all(self._pop()) | |||
@@ -253,6 +258,8 @@ class Tokenizer(object): | |||
def _handle_template_param(self): | |||
"""Handle a template parameter at the head of the string.""" | |||
if self._context & contexts.TEMPLATE_NAME: | |||
if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE): | |||
self._fail_route() | |||
self._context ^= contexts.TEMPLATE_NAME | |||
elif self._context & contexts.TEMPLATE_PARAM_VALUE: | |||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | |||
@@ -271,7 +278,10 @@ class Tokenizer(object): | |||
def _handle_template_end(self): | |||
"""Handle the end of a template at the head of the string.""" | |||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||
if self._context & contexts.TEMPLATE_NAME: | |||
if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE): | |||
self._fail_route() | |||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._emit_all(self._pop(keep_context=True)) | |||
self._head += 1 | |||
return self._pop() | |||
@@ -1183,23 +1193,22 @@ class Tokenizer(object): | |||
elif context & contexts.EXT_LINK_TITLE: | |||
return this != "\n" | |||
elif context & contexts.TEMPLATE_NAME: | |||
if this == "{" or this == "}" or this == "[": | |||
if this == "{": | |||
self._context |= contexts.HAS_TEMPLATE | contexts.FAIL_NEXT | |||
return True | |||
if this == "}" or (this == "<" and self._read(1) == "!"): | |||
self._context |= contexts.FAIL_NEXT | |||
return True | |||
if this == "]" or this == ">" or (this == "<" and self._read(1) != "!"): | |||
if this == "[" or this == "]" or this == "<" or this == ">": | |||
return False | |||
if this == "|": | |||
return True | |||
if context & contexts.HAS_TEXT: | |||
if context & contexts.FAIL_ON_TEXT: | |||
if this is self.END or not this.isspace(): | |||
if this == "<" and self._read(1) == "!": | |||
self._context |= contexts.FAIL_NEXT | |||
return True | |||
return False | |||
else: | |||
if this == "\n": | |||
self._context |= contexts.FAIL_ON_TEXT | |||
elif this == "\n": | |||
self._context |= contexts.FAIL_ON_TEXT | |||
elif this is self.END or not this.isspace(): | |||
self._context |= contexts.HAS_TEXT | |||
return True | |||
@@ -115,8 +115,8 @@ class TestDocs(unittest.TestCase): | |||
@unittest.skipIf("NOWEB" in os.environ, "web test disabled by environ var") | |||
def test_readme_5(self): | |||
"""test a block of example code in the README; includes a web call""" | |||
url1 = "http://en.wikipedia.org/w/api.php" | |||
url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw" | |||
url1 = "https://en.wikipedia.org/w/api.php" | |||
url2 = "https://en.wikipedia.org/w/index.php?title={0}&action=raw" | |||
title = "Test" | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
"rvprop": "content", "format": "json", "titles": title} | |||
@@ -213,6 +213,9 @@ class TestTemplate(TreeEqualityTestCase): | |||
pgens("f", "g")]) | |||
node37 = Template(wraptext("a"), [pgenh("1", "")]) | |||
node38 = Template(wraptext("abc")) | |||
node39 = Template(wraptext("a"), [pgenh("1", " b ")]) | |||
node40 = Template(wraptext("a"), [pgenh("1", " b"), pgenh("2", " c")]) | |||
node41 = Template(wraptext("a"), [pgens("1", " b"), pgens("2", " c")]) | |||
node1.add("e", "f", showkey=True) | |||
node2.add(2, "g", showkey=False) | |||
@@ -255,6 +258,9 @@ class TestTemplate(TreeEqualityTestCase): | |||
node37.add(1, "b") | |||
node38.add("1", "foo") | |||
self.assertRaises(ValueError, node38.add, "z", "bar", showkey=False) | |||
node39.add("1", "c") | |||
node40.add("3", "d") | |||
node41.add("3", "d") | |||
self.assertEqual("{{a|b=c|d|e=f}}", node1) | |||
self.assertEqual("{{a|b=c|d|g}}", node2) | |||
@@ -299,6 +305,9 @@ class TestTemplate(TreeEqualityTestCase): | |||
self.assertEqual("{{a|b=c|d=h|f=g}}", node36) | |||
self.assertEqual("{{a|b}}", node37) | |||
self.assertEqual("{{abc|foo}}", node38) | |||
self.assertEqual("{{a|c}}", node39) | |||
self.assertEqual("{{a| b| c|d}}", node40) | |||
self.assertEqual("{{a|1= b|2= c|3= d}}", node41) | |||
def test_remove(self): | |||
"""test Template.remove()""" | |||
@@ -395,13 +404,13 @@ class TestTemplate(TreeEqualityTestCase): | |||
self.assertRaises(ValueError, node2.remove, "1") | |||
self.assertEqual("{{foo}}", node2) | |||
self.assertEqual("{{foo||abc=}}", node3) | |||
self.assertEqual("{{foo||baz}}", node4) | |||
self.assertEqual("{{foo|2=baz}}", node4) | |||
self.assertEqual("{{foo|b=c}}", node5) | |||
self.assertEqual("{{foo| a=|b=c}}", node6) | |||
self.assertEqual("{{foo|1 =|2=c}}", node7) | |||
self.assertEqual("{{foo|2=c}}", node8) | |||
self.assertEqual("{{foo||c}}", node9) | |||
self.assertEqual("{{foo||c}}", node10) | |||
self.assertEqual("{{foo|2=c}}", node10) | |||
self.assertEqual("{{foo|b=c|a =d}}", node11) | |||
self.assertEqual("{{foo| a=|b=c|a =d}}", node12) | |||
self.assertEqual("{{foo| a=b|a =d}}", node13) | |||
@@ -410,7 +419,7 @@ class TestTemplate(TreeEqualityTestCase): | |||
self.assertEqual("{{foo| a=b|b=c|a =}}", node16) | |||
self.assertEqual("{{foo|b|c}}", node17) | |||
self.assertEqual("{{foo|1 =|b|c}}", node18) | |||
self.assertEqual("{{foo|1 =a||c}}", node19) | |||
self.assertEqual("{{foo|1 =a|2=c}}", node19) | |||
self.assertEqual("{{foo|1 =a||c}}", node20) | |||
self.assertEqual("{{foo|c=d|e=f}}", node21) | |||
self.assertEqual("{{foo|a=|c=d|e=f}}", node22) | |||
@@ -244,6 +244,13 @@ output: [Text(text="{{foobar\n<!|key=value}}")] | |||
--- | |||
name: newline_and_comment_in_template_name_8 | |||
label: a template name containing a newline followed by a comment | |||
input: "{{<!-- comment -->\nfoobar\n<!-- comment -->}}" | |||
output: [TemplateOpen(), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\nfoobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateClose()] | |||
--- | |||
name: tag_in_link_title | |||
label: HTML tags are invalid in link titles, even when complete | |||
input: "[[foo<i>bar</i>baz]]" | |||
@@ -1,17 +1,3 @@ | |||
name: blank | |||
label: template with no content | |||
input: "{{}}" | |||
output: [TemplateOpen(), TemplateClose()] | |||
name: blank_with_params | |||
label: template with no content, but pipes and equal signs | |||
input: "{{||=|}}" | |||
output: [TemplateOpen(), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()] | |||
name: no_params | |||
label: simplest type of template | |||
input: "{{template}}" | |||
@@ -61,6 +47,13 @@ output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text=" | |||
--- | |||
name: blank_params | |||
label: template with blank parameters (mix of pipes and equal signs) | |||
input: "{{,||=|}}" | |||
output: [TemplateOpen(), Text(text=","), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()] | |||
--- | |||
name: nested_unnamed_param | |||
label: nested template as an unnamed parameter | |||
input: "{{foo|{{bar}}}}" | |||
@@ -390,6 +383,34 @@ output: [TemplateOpen(), Text(text="foo\n "), TemplateParamSeparator(), Text(te | |||
--- | |||
name: invalid_blank | |||
label: invalid template with no content | |||
input: "{{}}" | |||
output: [Text(text="{{}}")] | |||
--- | |||
name: invalid_blank_whitespace | |||
label: invalid template with no content, but whitespace | |||
input: "{{ }}" | |||
output: [Text(text="{{ }}")] | |||
--- | |||
name: invalid_blank_pipe | |||
label: invalid template with no content, but a parameter | |||
input: "{{|foo}}" | |||
output: [Text(text="{{|foo}}")] | |||
--- | |||
name: invalid_blank_whitespace_pipe | |||
label: invalid template with no content, but whitespace and a parameter | |||
input: "{{ |foo}}" | |||
output: [Text(text="{{ |foo}}")] | |||
--- | |||
name: invalid_name_left_brace_middle | |||
label: invalid characters in template name: left brace in middle | |||
input: "{{foo{bar}}" | |||
@@ -665,5 +686,5 @@ output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ | |||
name: recursion_opens_and_closes | |||
label: test potentially dangerous recursion: template openings and closings | |||
input: "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}" | |||
output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")] | |||
input: "{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}" | |||
output: [Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), TemplateOpen(), Text(text="x"), TemplateParamSeparator(), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x"), TemplateParamSeparator(), Text(text="{{x"), TemplateClose(), Text(text="{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}")] |