diff --git a/.travis.yml b/.travis.yml index c09e793..7095d21 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,8 @@ python: - 3.2 - 3.3 - 3.4 - - nightly + - 3.5-dev +sudo: false install: - pip install coveralls - python setup.py build diff --git a/CHANGELOG b/CHANGELOG index 7ad2930..47d4331 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,11 @@ v0.4.1 (unreleased): - Added support for Python 3.5. - '<' and '>' are now disallowed in wikilink titles and template names. This includes when denoting tags, but not comments. +- Fixed the behavior of preserve_spacing in Template.add() and keep_field in + Template.remove() on parameters with hidden keys. +- Fixed parser bugs involving: + - templates with completely blank names; + - templates with newlines and comments. - Heavy refactoring and fixes to the C tokenizer. - Fixed some bugs in the release scripts. diff --git a/README.rst b/README.rst index 7e3e68d..c361a56 100644 --- a/README.rst +++ b/README.rst @@ -139,7 +139,7 @@ If you're not using a library, you can parse any page using the following code from urllib.parse import urlencode from urllib.request import urlopen import mwparserfromhell - API_URL = "http://en.wikipedia.org/w/api.php" + API_URL = "https://en.wikipedia.org/w/api.php" def parse(title): data = {"action": "query", "prop": "revisions", "rvlimit": 1, diff --git a/docs/changelog.rst b/docs/changelog.rst index 2944992..6b5e32d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -13,6 +13,13 @@ Unreleased - Added support for Python 3.5. - ``<`` and ``>`` are now disallowed in wikilink titles and template names. This includes when denoting tags, but not comments. +- Fixed the behavior of *preserve_spacing* in :func:`~.Template.add` and + *keep_field* in :func:`~.Template.remove` on parameters with hidden keys. +- Fixed parser bugs involving: + + - templates with completely blank names; + - templates with newlines and comments. + - Heavy refactoring and fixes to the C tokenizer. - Fixed some bugs in the release scripts. diff --git a/docs/integration.rst b/docs/integration.rst index bbd00bb..af3abc9 100644 --- a/docs/integration.rst +++ b/docs/integration.rst @@ -25,7 +25,7 @@ If you're not using a library, you can parse any page using the following code from urllib.parse import urlencode from urllib.request import urlopen import mwparserfromhell - API_URL = "http://en.wikipedia.org/w/api.php" + API_URL = "https://en.wikipedia.org/w/api.php" def parse(title): data = {"action": "query", "prop": "revisions", "rvlimit": 1, diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 7cbeb7d..4ee5f5d 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -82,21 +82,11 @@ class Template(Node): if char in node: code.replace(node, node.replace(char, replacement), False) - def _blank_param_value(self, value): - """Remove the content from *value* while keeping its whitespace. - - Replace *value*\ 's nodes with two text nodes, the first containing - whitespace from before its content and the second containing whitespace - from after its content. - """ - match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS) - value.nodes = [Text(match.group(1)), Text(match.group(2))] - def _select_theory(self, theories): """Return the most likely spacing convention given different options. - Given a dictionary of convention options as keys and their occurrence as - values, return the convention that occurs the most, or ``None`` if + Given a dictionary of convention options as keys and their occurrence + as values, return the convention that occurs the most, or ``None`` if there is no clear preferred style. """ if theories: @@ -129,34 +119,47 @@ class Template(Node): after = self._select_theory(after_theories) return before, after - def _remove_with_field(self, param, i, name): - """Return True if a parameter name should be kept, otherwise False.""" - if param.showkey: - following = self.params[i+1:] - better_matches = [after.name.strip() == name and not after.showkey for after in following] - if any(better_matches): - return False - return True - - def _remove_without_field(self, param, i): - """Return False if a parameter name should be kept, otherwise True.""" - if not param.showkey: - dependents = [not after.showkey for after in self.params[i+1:]] - if any(dependents): - return False - return True + def _blank_param_value(self, value): + """Remove the content from *value* while keeping its whitespace. + + Replace *value*\ 's nodes with two text nodes, the first containing + whitespace from before its content and the second containing whitespace + from after its content. + """ + match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS) + value.nodes = [Text(match.group(1)), Text(match.group(2))] + + def _fix_dependendent_params(self, i): + """Unhide keys if necessary after removing the param at index *i*.""" + if not self.params[i].showkey: + for param in self.params[i + 1:]: + if not param.showkey: + param.showkey = True def _remove_exact(self, needle, keep_field): """Remove a specific parameter, *needle*, from the template.""" for i, param in enumerate(self.params): if param is needle: - if keep_field or not self._remove_without_field(param, i): + if keep_field: self._blank_param_value(param.value) else: + self._fix_dependendent_params(i) self.params.pop(i) return raise ValueError(needle) + def _should_remove(self, i, name): + """Look ahead for a parameter with the same name, but hidden. + + If one exists, we should remove the given one rather than blanking it. + """ + if self.params[i].showkey: + following = self.params[i + 1:] + better_matches = [after.name.strip() == name and not after.showkey + for after in following] + return any(better_matches) + return False + @property def name(self): """The name of the template, as a :class:`.Wikicode` object.""" @@ -213,26 +216,25 @@ class Template(Node): :func:`.utils.parse_anything`; pipes and equal signs are automatically escaped from *value* when appropriate. + If *name* is already a parameter in the template, we'll replace its + value. + If *showkey* is given, this will determine whether or not to show the parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent guess. - If *name* is already a parameter in the template, we'll replace its - value while keeping the same whitespace around it. We will also try to - guess the dominant spacing convention when adding a new parameter using - :meth:`_get_spacing_conventions`. - If *before* is given (either a :class:`.Parameter` object or a name), then we will place the parameter immediately before this one. Otherwise, it will be added at the end. If *before* is a name and exists multiple times in the template, we will place it before the last occurrence. If *before* is not in the template, :exc:`ValueError` is - raised. The argument is ignored if the new parameter already exists. + raised. The argument is ignored if *name* is an existing parameter. - If *preserve_spacing* is ``False``, we will avoid preserving spacing - conventions when changing the value of an existing parameter or when - adding a new one. + If *preserve_spacing* is ``True``, we will try to preserve whitespace + conventions around the parameter, whether it is new or we are updating + an existing value. It is disabled for parameters with hidden keys, + since MediaWiki doesn't strip whitespace in this case. """ name, value = parse_anything(name), parse_anything(value) self._surface_escape(value, "|") @@ -245,7 +247,7 @@ class Template(Node): if not existing.showkey: self._surface_escape(value, "=") nodes = existing.value.nodes - if preserve_spacing: + if preserve_spacing and existing.showkey: for i in range(2): # Ignore empty text nodes if not nodes[i]: nodes[i] = None @@ -271,7 +273,7 @@ class Template(Node): if not showkey: self._surface_escape(value, "=") - if preserve_spacing: + if preserve_spacing and showkey: before_n, after_n = self._get_spacing_conventions(use_names=True) before_v, after_v = self._get_spacing_conventions(use_names=False) name = parse_anything([before_n, name, after_n]) @@ -294,36 +296,39 @@ class Template(Node): and :meth:`get`. If *keep_field* is ``True``, we will keep the parameter's name, but - blank its value. Otherwise, we will remove the parameter completely - *unless* other parameters are dependent on it (e.g. removing ``bar`` - from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what - we expected, so ``{{foo||baz}}`` will be produced instead). + blank its value. Otherwise, we will remove the parameter completely. + + When removing a parameter with a hidden name, subsequent parameters + with hidden names will be made visible. For example, removing ``bar`` + from ``{{foo|bar|baz}}`` produces ``{{foo|2=baz}}`` because + ``{{foo|baz}}`` is incorrect. If the parameter shows up multiple times in the template and *param* is not a :class:`.Parameter` object, we will remove all instances of it - (and keep only one if *keep_field* is ``True`` - the first instance if - none have dependents, otherwise the one with dependents will be kept). + (and keep only one if *keep_field* is ``True`` - either the one with a + hidden name, if it exists, or the first instance). """ if isinstance(param, Parameter): return self._remove_exact(param, keep_field) + name = str(param).strip() removed = False to_remove = [] + for i, param in enumerate(self.params): if param.name.strip() == name: if keep_field: - if self._remove_with_field(param, i, name): - self._blank_param_value(param.value) - keep_field = False - else: - to_remove.append(i) - else: - if self._remove_without_field(param, i): + if self._should_remove(i, name): to_remove.append(i) else: self._blank_param_value(param.value) + keep_field = False + else: + self._fix_dependendent_params(i) + to_remove.append(i) if not removed: removed = True + if not removed: raise ValueError(name) for i in reversed(to_remove): diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index ae13c76..cbe58c5 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -40,11 +40,11 @@ class ParserError(Exception): from .builder import Builder -from .tokenizer import Tokenizer try: from ._tokenizer import CTokenizer use_c = True except ImportError: + from .tokenizer import Tokenizer CTokenizer = None use_c = False @@ -70,6 +70,7 @@ class Parser(object): if use_c and CTokenizer: self._tokenizer = CTokenizer() else: + from .tokenizer import Tokenizer self._tokenizer = Tokenizer() self._builder = Builder() diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index e98d8f7..b676e86 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -89,6 +89,7 @@ Local (stack-specific) contexts: * :const:`FAIL_ON_LBRACE` * :const:`FAIL_ON_RBRACE` * :const:`FAIL_ON_EQUALS` + * :const:`HAS_TEMPLATE` * :const:`TABLE` @@ -161,15 +162,16 @@ FAIL_NEXT = 1 << 26 FAIL_ON_LBRACE = 1 << 27 FAIL_ON_RBRACE = 1 << 28 FAIL_ON_EQUALS = 1 << 29 +HAS_TEMPLATE = 1 << 30 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + - FAIL_ON_RBRACE + FAIL_ON_EQUALS) - -TABLE_OPEN = 1 << 30 -TABLE_CELL_OPEN = 1 << 31 -TABLE_CELL_STYLE = 1 << 32 -TABLE_ROW_OPEN = 1 << 33 -TABLE_TD_LINE = 1 << 34 -TABLE_TH_LINE = 1 << 35 + FAIL_ON_RBRACE + FAIL_ON_EQUALS + HAS_TEMPLATE) + +TABLE_OPEN = 1 << 31 +TABLE_CELL_OPEN = 1 << 32 +TABLE_CELL_STYLE = 1 << 33 +TABLE_ROW_OPEN = 1 << 34 +TABLE_TD_LINE = 1 << 35 +TABLE_TH_LINE = 1 << 36 TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN + TABLE_TD_LINE + TABLE_TH_LINE) diff --git a/mwparserfromhell/parser/ctokenizer/contexts.h b/mwparserfromhell/parser/ctokenizer/contexts.h index 8e24372..4e4a8c7 100644 --- a/mwparserfromhell/parser/ctokenizer/contexts.h +++ b/mwparserfromhell/parser/ctokenizer/contexts.h @@ -63,22 +63,23 @@ SOFTWARE. #define LC_DLTERM 0x0000000000800000 -#define LC_SAFETY_CHECK 0x000000003F000000 +#define LC_SAFETY_CHECK 0x000000007F000000 #define LC_HAS_TEXT 0x0000000001000000 #define LC_FAIL_ON_TEXT 0x0000000002000000 #define LC_FAIL_NEXT 0x0000000004000000 #define LC_FAIL_ON_LBRACE 0x0000000008000000 #define LC_FAIL_ON_RBRACE 0x0000000010000000 #define LC_FAIL_ON_EQUALS 0x0000000020000000 - -#define LC_TABLE 0x0000000FC0000000 -#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000 -#define LC_TABLE_OPEN 0x0000000040000000 -#define LC_TABLE_CELL_OPEN 0x0000000080000000 -#define LC_TABLE_CELL_STYLE 0x0000000100000000 -#define LC_TABLE_ROW_OPEN 0x0000000200000000 -#define LC_TABLE_TD_LINE 0x0000000400000000 -#define LC_TABLE_TH_LINE 0x0000000800000000 +#define LC_HAS_TEMPLATE 0x0000000040000000 + +#define LC_TABLE 0x0000001F80000000 +#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000001A00000000 +#define LC_TABLE_OPEN 0x0000000080000000 +#define LC_TABLE_CELL_OPEN 0x0000000100000000 +#define LC_TABLE_CELL_STYLE 0x0000000200000000 +#define LC_TABLE_ROW_OPEN 0x0000000400000000 +#define LC_TABLE_TD_LINE 0x0000000800000000 +#define LC_TABLE_TH_LINE 0x0000001000000000 /* Global contexts */ diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index 81d4bce..d761e27 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -121,12 +121,16 @@ static PyObject* strip_tag_name(PyObject* token, int take_attr) /* Parse a template at the head of the wikicode string. */ -static int Tokenizer_parse_template(Tokenizer* self) +static int Tokenizer_parse_template(Tokenizer* self, int has_content) { PyObject *template; Py_ssize_t reset = self->head; + uint64_t context = LC_TEMPLATE_NAME; - template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1); + if (has_content) + context |= LC_HAS_TEMPLATE; + + template = Tokenizer_parse(self, context, 1); if (BAD_ROUTE) { self->head = reset; return 0; @@ -182,6 +186,7 @@ static int Tokenizer_parse_argument(Tokenizer* self) static int Tokenizer_parse_template_or_argument(Tokenizer* self) { unsigned int braces = 2, i; + int has_content = 0; PyObject *tokenlist; self->head += 2; @@ -198,7 +203,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) return 0; } if (braces == 2) { - if (Tokenizer_parse_template(self)) + if (Tokenizer_parse_template(self, has_content)) return -1; if (BAD_ROUTE) { RESET_ROUTE(); @@ -212,7 +217,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) return -1; if (BAD_ROUTE) { RESET_ROUTE(); - if (Tokenizer_parse_template(self)) + if (Tokenizer_parse_template(self, has_content)) return -1; if (BAD_ROUTE) { char text[MAX_BRACES + 1]; @@ -228,8 +233,10 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) } else braces -= 3; - if (braces) + if (braces) { + has_content = 1; self->head++; + } } tokenlist = Tokenizer_pop(self); if (!tokenlist) @@ -251,8 +258,13 @@ static int Tokenizer_handle_template_param(Tokenizer* self) { PyObject *stack; - if (self->topstack->context & LC_TEMPLATE_NAME) + if (self->topstack->context & LC_TEMPLATE_NAME) { + if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) { + Tokenizer_fail_route(self); + return -1; + } self->topstack->context ^= LC_TEMPLATE_NAME; + } else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE; if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { @@ -303,7 +315,11 @@ static PyObject* Tokenizer_handle_template_end(Tokenizer* self) { PyObject* stack; - if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { + if (self->topstack->context & LC_TEMPLATE_NAME) { + if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) + return Tokenizer_fail_route(self); + } + else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { stack = Tokenizer_pop_keeping_context(self); if (!stack) return NULL; @@ -2428,30 +2444,26 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data) if (context & LC_TAG_CLOSE) return (data == '<') ? -1 : 0; if (context & LC_TEMPLATE_NAME) { - if (data == '{' || data == '}' || data == '[') { + if (data == '{') { + self->topstack->context |= LC_HAS_TEMPLATE | LC_FAIL_NEXT; + return 0; + } + if (data == '}' || (data == '<' && Tokenizer_READ(self, 1) == '!')) { self->topstack->context |= LC_FAIL_NEXT; return 0; } - if (data == ']' || data == '>' || (data == '<' && - Tokenizer_READ(self, 1) != '!')) { + if (data == '[' || data == ']' || data == '<' || data == '>') { return -1; } if (data == '|') return 0; if (context & LC_HAS_TEXT) { if (context & LC_FAIL_ON_TEXT) { - if (!Py_UNICODE_ISSPACE(data)) { - if (data == '<' && Tokenizer_READ(self, 1) == '!') { - self->topstack->context |= LC_FAIL_NEXT; - return 0; - } + if (!Py_UNICODE_ISSPACE(data)) return -1; - } - } - else { - if (data == '\n') - self->topstack->context |= LC_FAIL_ON_TEXT; } + else if (data == '\n') + self->topstack->context |= LC_FAIL_ON_TEXT; } else if (!Py_UNICODE_ISSPACE(data)) self->topstack->context |= LC_HAS_TEXT; diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 4d7d885..5c89455 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -192,11 +192,14 @@ class Tokenizer(object): self._fail_route() return self.END - def _parse_template(self): + def _parse_template(self, has_content): """Parse a template at the head of the wikicode string.""" reset = self._head + context = contexts.TEMPLATE_NAME + if has_content: + context |= contexts.HAS_TEMPLATE try: - template = self._parse(contexts.TEMPLATE_NAME) + template = self._parse(context) except BadRoute: self._head = reset raise @@ -223,6 +226,7 @@ class Tokenizer(object): while self._read() == "{": self._head += 1 braces += 1 + has_content = False self._push() while braces: @@ -230,7 +234,7 @@ class Tokenizer(object): return self._emit_text_then_stack("{") if braces == 2: try: - self._parse_template() + self._parse_template(has_content) except BadRoute: return self._emit_text_then_stack("{{") break @@ -239,11 +243,12 @@ class Tokenizer(object): braces -= 3 except BadRoute: try: - self._parse_template() + self._parse_template(has_content) braces -= 2 except BadRoute: return self._emit_text_then_stack("{" * braces) if braces: + has_content = True self._head += 1 self._emit_all(self._pop()) @@ -253,6 +258,8 @@ class Tokenizer(object): def _handle_template_param(self): """Handle a template parameter at the head of the string.""" if self._context & contexts.TEMPLATE_NAME: + if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE): + self._fail_route() self._context ^= contexts.TEMPLATE_NAME elif self._context & contexts.TEMPLATE_PARAM_VALUE: self._context ^= contexts.TEMPLATE_PARAM_VALUE @@ -271,7 +278,10 @@ class Tokenizer(object): def _handle_template_end(self): """Handle the end of a template at the head of the string.""" - if self._context & contexts.TEMPLATE_PARAM_KEY: + if self._context & contexts.TEMPLATE_NAME: + if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE): + self._fail_route() + elif self._context & contexts.TEMPLATE_PARAM_KEY: self._emit_all(self._pop(keep_context=True)) self._head += 1 return self._pop() @@ -1183,23 +1193,22 @@ class Tokenizer(object): elif context & contexts.EXT_LINK_TITLE: return this != "\n" elif context & contexts.TEMPLATE_NAME: - if this == "{" or this == "}" or this == "[": + if this == "{": + self._context |= contexts.HAS_TEMPLATE | contexts.FAIL_NEXT + return True + if this == "}" or (this == "<" and self._read(1) == "!"): self._context |= contexts.FAIL_NEXT return True - if this == "]" or this == ">" or (this == "<" and self._read(1) != "!"): + if this == "[" or this == "]" or this == "<" or this == ">": return False if this == "|": return True if context & contexts.HAS_TEXT: if context & contexts.FAIL_ON_TEXT: if this is self.END or not this.isspace(): - if this == "<" and self._read(1) == "!": - self._context |= contexts.FAIL_NEXT - return True return False - else: - if this == "\n": - self._context |= contexts.FAIL_ON_TEXT + elif this == "\n": + self._context |= contexts.FAIL_ON_TEXT elif this is self.END or not this.isspace(): self._context |= contexts.HAS_TEXT return True diff --git a/tests/test_docs.py b/tests/test_docs.py index d50e90e..1c94130 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -115,8 +115,8 @@ class TestDocs(unittest.TestCase): @unittest.skipIf("NOWEB" in os.environ, "web test disabled by environ var") def test_readme_5(self): """test a block of example code in the README; includes a web call""" - url1 = "http://en.wikipedia.org/w/api.php" - url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw" + url1 = "https://en.wikipedia.org/w/api.php" + url2 = "https://en.wikipedia.org/w/index.php?title={0}&action=raw" title = "Test" data = {"action": "query", "prop": "revisions", "rvlimit": 1, "rvprop": "content", "format": "json", "titles": title} diff --git a/tests/test_template.py b/tests/test_template.py index 7ba3f64..e990818 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -213,6 +213,9 @@ class TestTemplate(TreeEqualityTestCase): pgens("f", "g")]) node37 = Template(wraptext("a"), [pgenh("1", "")]) node38 = Template(wraptext("abc")) + node39 = Template(wraptext("a"), [pgenh("1", " b ")]) + node40 = Template(wraptext("a"), [pgenh("1", " b"), pgenh("2", " c")]) + node41 = Template(wraptext("a"), [pgens("1", " b"), pgens("2", " c")]) node1.add("e", "f", showkey=True) node2.add(2, "g", showkey=False) @@ -255,6 +258,9 @@ class TestTemplate(TreeEqualityTestCase): node37.add(1, "b") node38.add("1", "foo") self.assertRaises(ValueError, node38.add, "z", "bar", showkey=False) + node39.add("1", "c") + node40.add("3", "d") + node41.add("3", "d") self.assertEqual("{{a|b=c|d|e=f}}", node1) self.assertEqual("{{a|b=c|d|g}}", node2) @@ -299,6 +305,9 @@ class TestTemplate(TreeEqualityTestCase): self.assertEqual("{{a|b=c|d=h|f=g}}", node36) self.assertEqual("{{a|b}}", node37) self.assertEqual("{{abc|foo}}", node38) + self.assertEqual("{{a|c}}", node39) + self.assertEqual("{{a| b| c|d}}", node40) + self.assertEqual("{{a|1= b|2= c|3= d}}", node41) def test_remove(self): """test Template.remove()""" @@ -395,13 +404,13 @@ class TestTemplate(TreeEqualityTestCase): self.assertRaises(ValueError, node2.remove, "1") self.assertEqual("{{foo}}", node2) self.assertEqual("{{foo||abc=}}", node3) - self.assertEqual("{{foo||baz}}", node4) + self.assertEqual("{{foo|2=baz}}", node4) self.assertEqual("{{foo|b=c}}", node5) self.assertEqual("{{foo| a=|b=c}}", node6) self.assertEqual("{{foo|1 =|2=c}}", node7) self.assertEqual("{{foo|2=c}}", node8) self.assertEqual("{{foo||c}}", node9) - self.assertEqual("{{foo||c}}", node10) + self.assertEqual("{{foo|2=c}}", node10) self.assertEqual("{{foo|b=c|a =d}}", node11) self.assertEqual("{{foo| a=|b=c|a =d}}", node12) self.assertEqual("{{foo| a=b|a =d}}", node13) @@ -410,7 +419,7 @@ class TestTemplate(TreeEqualityTestCase): self.assertEqual("{{foo| a=b|b=c|a =}}", node16) self.assertEqual("{{foo|b|c}}", node17) self.assertEqual("{{foo|1 =|b|c}}", node18) - self.assertEqual("{{foo|1 =a||c}}", node19) + self.assertEqual("{{foo|1 =a|2=c}}", node19) self.assertEqual("{{foo|1 =a||c}}", node20) self.assertEqual("{{foo|c=d|e=f}}", node21) self.assertEqual("{{foo|a=|c=d|e=f}}", node22) diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index 27a7d39..4d6b940 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -244,6 +244,13 @@ output: [Text(text="{{foobar\n\nfoobar\n}}" +output: [TemplateOpen(), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\nfoobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateClose()] + +--- + name: tag_in_link_title label: HTML tags are invalid in link titles, even when complete input: "[[foobarbaz]]" diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest index 8852703..1913f5d 100644 --- a/tests/tokenizer/templates.mwtest +++ b/tests/tokenizer/templates.mwtest @@ -1,17 +1,3 @@ -name: blank -label: template with no content -input: "{{}}" -output: [TemplateOpen(), TemplateClose()] - ---- - -name: blank_with_params -label: template with no content, but pipes and equal signs -input: "{{||=|}}" -output: [TemplateOpen(), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()] - ---- - name: no_params label: simplest type of template input: "{{template}}" @@ -61,6 +47,13 @@ output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text=" --- +name: blank_params +label: template with blank parameters (mix of pipes and equal signs) +input: "{{,||=|}}" +output: [TemplateOpen(), Text(text=","), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()] + +--- + name: nested_unnamed_param label: nested template as an unnamed parameter input: "{{foo|{{bar}}}}" @@ -390,6 +383,34 @@ output: [TemplateOpen(), Text(text="foo\n "), TemplateParamSeparator(), Text(te --- +name: invalid_blank +label: invalid template with no content +input: "{{}}" +output: [Text(text="{{}}")] + +--- + +name: invalid_blank_whitespace +label: invalid template with no content, but whitespace +input: "{{ }}" +output: [Text(text="{{ }}")] + +--- + +name: invalid_blank_pipe +label: invalid template with no content, but a parameter +input: "{{|foo}}" +output: [Text(text="{{|foo}}")] + +--- + +name: invalid_blank_whitespace_pipe +label: invalid template with no content, but whitespace and a parameter +input: "{{ |foo}}" +output: [Text(text="{{ |foo}}")] + +--- + name: invalid_name_left_brace_middle label: invalid characters in template name: left brace in middle input: "{{foo{bar}}" @@ -665,5 +686,5 @@ output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ name: recursion_opens_and_closes label: test potentially dangerous recursion: template openings and closings -input: "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}" -output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")] +input: "{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}" +output: [Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), TemplateOpen(), Text(text="x"), TemplateParamSeparator(), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x"), TemplateParamSeparator(), Text(text="{{x"), TemplateClose(), Text(text="{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}")]