Merge branch 'develop' into feature/c_refactor

9 years ago · 8b72c783f0
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,7 +5,8 @@ python:
    - 3.2
    - 3.3
    - 3.4
    - nightly
    - 3.5-dev
 sudo: false
 install:
    - pip install coveralls
    - python setup.py build
--- a/+ 5
+++ b/+ 5
@@ -6,6 +6,11 @@ v0.4.1 (unreleased):
 - Added support for Python 3.5.
 - '<' and '>' are now disallowed in wikilink titles and template names. This
  includes when denoting tags, but not comments.
 - Fixed the behavior of preserve_spacing in Template.add() and keep_field in
  Template.remove() on parameters with hidden keys.
 - Fixed parser bugs involving:
  - templates with completely blank names;
  - templates with newlines and comments.
 - Heavy refactoring and fixes to the C tokenizer.
 - Fixed some bugs in the release scripts.

--- a/README.rst
+++ b/README.rst
@@ -139,7 +139,7 @@ If you're not using a library, you can parse any page using the following code
    from urllib.parse import urlencode
    from urllib.request import urlopen
    import mwparserfromhell
    API_URL = "http://en.wikipedia.org/w/api.php"
    API_URL = "https://en.wikipedia.org/w/api.php"

    def parse(title):
        data = {"action": "query", "prop": "revisions", "rvlimit": 1,
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -13,6 +13,13 @@ Unreleased
 - Added support for Python 3.5.
 - ``<`` and ``>`` are now disallowed in wikilink titles and template names.
  This includes when denoting tags, but not comments.
 - Fixed the behavior of *preserve_spacing* in :func:`~.Template.add` and
  *keep_field* in :func:`~.Template.remove` on parameters with hidden keys.
 - Fixed parser bugs involving:

  - templates with completely blank names;
  - templates with newlines and comments.

 - Heavy refactoring and fixes to the C tokenizer.
 - Fixed some bugs in the release scripts.

--- a/docs/integration.rst
+++ b/docs/integration.rst
@@ -25,7 +25,7 @@ If you're not using a library, you can parse any page using the following code
    from urllib.parse import urlencode
    from urllib.request import urlopen
    import mwparserfromhell
    API_URL = "http://en.wikipedia.org/w/api.php"
    API_URL = "https://en.wikipedia.org/w/api.php"

    def parse(title):
        data = {"action": "query", "prop": "revisions", "rvlimit": 1,
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -82,21 +82,11 @@ class Template(Node):
            if char in node:
                code.replace(node, node.replace(char, replacement), False)

    def _blank_param_value(self, value):
        """Remove the content from *value* while keeping its whitespace.

        Replace *value*\ 's nodes with two text nodes, the first containing
        whitespace from before its content and the second containing whitespace
        from after its content.
        """
        match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS)
        value.nodes = [Text(match.group(1)), Text(match.group(2))]

    def _select_theory(self, theories):
        """Return the most likely spacing convention given different options.

        Given a dictionary of convention options as keys and their occurrence as
        values, return the convention that occurs the most, or ``None`` if
        Given a dictionary of convention options as keys and their occurrence
        as values, return the convention that occurs the most, or ``None`` if
        there is no clear preferred style.
        """
        if theories:
@@ -129,34 +119,47 @@ class Template(Node):
        after = self._select_theory(after_theories)
        return before, after

    def _remove_with_field(self, param, i, name):
        """Return True if a parameter name should be kept, otherwise False."""
        if param.showkey:
            following = self.params[i+1:]
            better_matches = [after.name.strip() == name and not after.showkey for after in following]
            if any(better_matches):
                return False
        return True

    def _remove_without_field(self, param, i):
        """Return False if a parameter name should be kept, otherwise True."""
        if not param.showkey:
            dependents = [not after.showkey for after in self.params[i+1:]]
            if any(dependents):
                return False
        return True
    def _blank_param_value(self, value):
        """Remove the content from *value* while keeping its whitespace.

        Replace *value*\ 's nodes with two text nodes, the first containing
        whitespace from before its content and the second containing whitespace
        from after its content.
        """
        match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS)
        value.nodes = [Text(match.group(1)), Text(match.group(2))]

    def _fix_dependendent_params(self, i):
        """Unhide keys if necessary after removing the param at index *i*."""
        if not self.params[i].showkey:
            for param in self.params[i + 1:]:
                if not param.showkey:
                    param.showkey = True

    def _remove_exact(self, needle, keep_field):
        """Remove a specific parameter, *needle*, from the template."""
        for i, param in enumerate(self.params):
            if param is needle:
                if keep_field or not self._remove_without_field(param, i):
                if keep_field:
                    self._blank_param_value(param.value)
                else:
                    self._fix_dependendent_params(i)
                    self.params.pop(i)
                return
        raise ValueError(needle)

    def _should_remove(self, i, name):
        """Look ahead for a parameter with the same name, but hidden.

        If one exists, we should remove the given one rather than blanking it.
        """
        if self.params[i].showkey:
            following = self.params[i + 1:]
            better_matches = [after.name.strip() == name and not after.showkey
                              for after in following]
            return any(better_matches)
        return False

    @property
    def name(self):
        """The name of the template, as a :class:`.Wikicode` object."""
@@ -213,26 +216,25 @@ class Template(Node):
        :func:`.utils.parse_anything`; pipes and equal signs are automatically
        escaped from *value* when appropriate.

        If *name* is already a parameter in the template, we'll replace its
        value.

        If *showkey* is given, this will determine whether or not to show the
        parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of
        ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent
        guess.

        If *name* is already a parameter in the template, we'll replace its
        value while keeping the same whitespace around it. We will also try to
        guess the dominant spacing convention when adding a new parameter using
        :meth:`_get_spacing_conventions`.

        If *before* is given (either a :class:`.Parameter` object or a name),
        then we will place the parameter immediately before this one.
        Otherwise, it will be added at the end. If *before* is a name and
        exists multiple times in the template, we will place it before the last
        occurrence. If *before* is not in the template, :exc:`ValueError` is
        raised. The argument is ignored if the new parameter already exists.
        raised. The argument is ignored if *name* is an existing parameter.

        If *preserve_spacing* is ``False``, we will avoid preserving spacing
        conventions when changing the value of an existing parameter or when
        adding a new one.
        If *preserve_spacing* is ``True``, we will try to preserve whitespace
        conventions around the parameter, whether it is new or we are updating
        an existing value. It is disabled for parameters with hidden keys,
        since MediaWiki doesn't strip whitespace in this case.
        """
        name, value = parse_anything(name), parse_anything(value)
        self._surface_escape(value, "|")
@@ -245,7 +247,7 @@ class Template(Node):
            if not existing.showkey:
                self._surface_escape(value, "=")
            nodes = existing.value.nodes
            if preserve_spacing:
            if preserve_spacing and existing.showkey:
                for i in range(2):  # Ignore empty text nodes
                    if not nodes[i]:
                        nodes[i] = None
@@ -271,7 +273,7 @@ class Template(Node):
        if not showkey:
            self._surface_escape(value, "=")

        if preserve_spacing:
        if preserve_spacing and showkey:
            before_n, after_n = self._get_spacing_conventions(use_names=True)
            before_v, after_v = self._get_spacing_conventions(use_names=False)
            name = parse_anything([before_n, name, after_n])
@@ -294,36 +296,39 @@ class Template(Node):
        and :meth:`get`.

        If *keep_field* is ``True``, we will keep the parameter's name, but
        blank its value. Otherwise, we will remove the parameter completely
        *unless* other parameters are dependent on it (e.g. removing ``bar``
        from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what
        we expected, so ``{{foo||baz}}`` will be produced instead).
        blank its value. Otherwise, we will remove the parameter completely.

        When removing a parameter with a hidden name, subsequent parameters
        with hidden names will be made visible. For example, removing ``bar``
        from ``{{foo|bar|baz}}`` produces ``{{foo|2=baz}}`` because
        ``{{foo|baz}}`` is incorrect.

        If the parameter shows up multiple times in the template and *param* is
        not a :class:`.Parameter` object, we will remove all instances of it
        (and keep only one if *keep_field* is ``True`` - the first instance if
        none have dependents, otherwise the one with dependents will be kept).
        (and keep only one if *keep_field* is ``True`` - either the one with a
        hidden name, if it exists, or the first instance).
        """
        if isinstance(param, Parameter):
            return self._remove_exact(param, keep_field)

        name = str(param).strip()
        removed = False
        to_remove = []

        for i, param in enumerate(self.params):
            if param.name.strip() == name:
                if keep_field:
                    if self._remove_with_field(param, i, name):
                        self._blank_param_value(param.value)
                        keep_field = False
                    else:
                        to_remove.append(i)
                else:
                    if self._remove_without_field(param, i):
                    if self._should_remove(i, name):
                        to_remove.append(i)
                    else:
                        self._blank_param_value(param.value)
                        keep_field = False
                else:
                    self._fix_dependendent_params(i)
                    to_remove.append(i)
                if not removed:
                    removed = True

        if not removed:
            raise ValueError(name)
        for i in reversed(to_remove):
--- a/mwparserfromhell/parser/init.py
+++ b/mwparserfromhell/parser/init.py
@@ -40,11 +40,11 @@ class ParserError(Exception):


 from .builder import Builder
 from .tokenizer import Tokenizer
 try:
    from ._tokenizer import CTokenizer
    use_c = True
 except ImportError:
    from .tokenizer import Tokenizer
    CTokenizer = None
    use_c = False

@@ -70,6 +70,7 @@ class Parser(object):
        if use_c and CTokenizer:
            self._tokenizer = CTokenizer()
        else:
            from .tokenizer import Tokenizer
            self._tokenizer = Tokenizer()
        self._builder = Builder()

--- a/mwparserfromhell/parser/contexts.py
+++ b/mwparserfromhell/parser/contexts.py
@@ -89,6 +89,7 @@ Local (stack-specific) contexts:
    * :const:`FAIL_ON_LBRACE`
    * :const:`FAIL_ON_RBRACE`
    * :const:`FAIL_ON_EQUALS`
    * :const:`HAS_TEMPLATE`

 * :const:`TABLE`

@@ -161,15 +162,16 @@ FAIL_NEXT  =     1 << 26
 FAIL_ON_LBRACE = 1 << 27
 FAIL_ON_RBRACE = 1 << 28
 FAIL_ON_EQUALS = 1 << 29
 HAS_TEMPLATE =   1 << 30
 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE +
                FAIL_ON_RBRACE + FAIL_ON_EQUALS)

 TABLE_OPEN =       1 << 30
 TABLE_CELL_OPEN =  1 << 31
 TABLE_CELL_STYLE = 1 << 32
 TABLE_ROW_OPEN =   1 << 33
 TABLE_TD_LINE =    1 << 34
 TABLE_TH_LINE =    1 << 35
                FAIL_ON_RBRACE + FAIL_ON_EQUALS + HAS_TEMPLATE)

 TABLE_OPEN =       1 << 31
 TABLE_CELL_OPEN =  1 << 32
 TABLE_CELL_STYLE = 1 << 33
 TABLE_ROW_OPEN =   1 << 34
 TABLE_TD_LINE =    1 << 35
 TABLE_TH_LINE =    1 << 36
 TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE
 TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN +
         TABLE_TD_LINE + TABLE_TH_LINE)
--- a/mwparserfromhell/parser/ctokenizer/contexts.h
+++ b/mwparserfromhell/parser/ctokenizer/contexts.h
@@ -63,22 +63,23 @@ SOFTWARE.

 #define LC_DLTERM                   0x0000000000800000

 #define LC_SAFETY_CHECK             0x000000003F000000
 #define LC_SAFETY_CHECK             0x000000007F000000
 #define LC_HAS_TEXT                 0x0000000001000000
 #define LC_FAIL_ON_TEXT             0x0000000002000000
 #define LC_FAIL_NEXT                0x0000000004000000
 #define LC_FAIL_ON_LBRACE           0x0000000008000000
 #define LC_FAIL_ON_RBRACE           0x0000000010000000
 #define LC_FAIL_ON_EQUALS           0x0000000020000000

 #define LC_TABLE                    0x0000000FC0000000
 #define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000
 #define LC_TABLE_OPEN               0x0000000040000000
 #define LC_TABLE_CELL_OPEN          0x0000000080000000
 #define LC_TABLE_CELL_STYLE         0x0000000100000000
 #define LC_TABLE_ROW_OPEN           0x0000000200000000
 #define LC_TABLE_TD_LINE            0x0000000400000000
 #define LC_TABLE_TH_LINE            0x0000000800000000
 #define LC_HAS_TEMPLATE             0x0000000040000000

 #define LC_TABLE                    0x0000001F80000000
 #define LC_TABLE_CELL_LINE_CONTEXTS 0x0000001A00000000
 #define LC_TABLE_OPEN               0x0000000080000000
 #define LC_TABLE_CELL_OPEN          0x0000000100000000
 #define LC_TABLE_CELL_STYLE         0x0000000200000000
 #define LC_TABLE_ROW_OPEN           0x0000000400000000
 #define LC_TABLE_TD_LINE            0x0000000800000000
 #define LC_TABLE_TH_LINE            0x0000001000000000

 /* Global contexts */

--- a/mwparserfromhell/parser/ctokenizer/tok_parse.c
+++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c
@@ -121,12 +121,16 @@ static PyObject* strip_tag_name(PyObject* token, int take_attr)
 /*
    Parse a template at the head of the wikicode string.
 */
 static int Tokenizer_parse_template(Tokenizer* self)
 static int Tokenizer_parse_template(Tokenizer* self, int has_content)
 {
    PyObject *template;
    Py_ssize_t reset = self->head;
    uint64_t context = LC_TEMPLATE_NAME;

    template = Tokenizer_parse(self, LC_TEMPLATE_NAME, 1);
    if (has_content)
        context |= LC_HAS_TEMPLATE;

    template = Tokenizer_parse(self, context, 1);
    if (BAD_ROUTE) {
        self->head = reset;
        return 0;
@@ -182,6 +186,7 @@ static int Tokenizer_parse_argument(Tokenizer* self)
 static int Tokenizer_parse_template_or_argument(Tokenizer* self)
 {
    unsigned int braces = 2, i;
    int has_content = 0;
    PyObject *tokenlist;

    self->head += 2;
@@ -198,7 +203,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
            return 0;
        }
        if (braces == 2) {
            if (Tokenizer_parse_template(self))
            if (Tokenizer_parse_template(self, has_content))
                return -1;
            if (BAD_ROUTE) {
                RESET_ROUTE();
@@ -212,7 +217,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
            return -1;
        if (BAD_ROUTE) {
            RESET_ROUTE();
            if (Tokenizer_parse_template(self))
            if (Tokenizer_parse_template(self, has_content))
                return -1;
            if (BAD_ROUTE) {
                char text[MAX_BRACES + 1];
@@ -228,8 +233,10 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self)
        }
        else
            braces -= 3;
        if (braces)
        if (braces) {
            has_content = 1;
            self->head++;
        }
    }
    tokenlist = Tokenizer_pop(self);
    if (!tokenlist)
@@ -251,8 +258,13 @@ static int Tokenizer_handle_template_param(Tokenizer* self)
 {
    PyObject *stack;

    if (self->topstack->context & LC_TEMPLATE_NAME)
    if (self->topstack->context & LC_TEMPLATE_NAME) {
        if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) {
            Tokenizer_fail_route(self);
            return -1;
        }
        self->topstack->context ^= LC_TEMPLATE_NAME;
    }
    else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE)
        self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE;
    if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
@@ -303,7 +315,11 @@ static PyObject* Tokenizer_handle_template_end(Tokenizer* self)
 {
    PyObject* stack;

    if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
    if (self->topstack->context & LC_TEMPLATE_NAME) {
        if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE)))
            return Tokenizer_fail_route(self);
    }
    else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) {
        stack = Tokenizer_pop_keeping_context(self);
        if (!stack)
            return NULL;
@@ -2428,30 +2444,26 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data)
    if (context & LC_TAG_CLOSE)
        return (data == '<') ? -1 : 0;
    if (context & LC_TEMPLATE_NAME) {
        if (data == '{' || data == '}' || data == '[') {
        if (data == '{') {
            self->topstack->context |= LC_HAS_TEMPLATE | LC_FAIL_NEXT;
            return 0;
        }
        if (data == '}' || (data == '<' && Tokenizer_READ(self, 1) == '!')) {
            self->topstack->context |= LC_FAIL_NEXT;
            return 0;
        }
        if (data == ']' || data == '>' || (data == '<' &&
                                           Tokenizer_READ(self, 1) != '!')) {
        if (data == '[' || data == ']' || data == '<' || data == '>') {
            return -1;
        }
        if (data == '|')
            return 0;
        if (context & LC_HAS_TEXT) {
            if (context & LC_FAIL_ON_TEXT) {
                if (!Py_UNICODE_ISSPACE(data)) {
                    if (data == '<' && Tokenizer_READ(self, 1) == '!') {
                        self->topstack->context |= LC_FAIL_NEXT;
                        return 0;
                    }
                if (!Py_UNICODE_ISSPACE(data))
                    return -1;
                }
            }
            else {
                if (data == '\n')
                    self->topstack->context |= LC_FAIL_ON_TEXT;
            }
            else if (data == '\n')
                self->topstack->context |= LC_FAIL_ON_TEXT;
        }
        else if (!Py_UNICODE_ISSPACE(data))
            self->topstack->context |= LC_HAS_TEXT;
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -192,11 +192,14 @@ class Tokenizer(object):
                self._fail_route()
            return self.END

    def _parse_template(self):
    def _parse_template(self, has_content):
        """Parse a template at the head of the wikicode string."""
        reset = self._head
        context = contexts.TEMPLATE_NAME
        if has_content:
            context |= contexts.HAS_TEMPLATE
        try:
            template = self._parse(contexts.TEMPLATE_NAME)
            template = self._parse(context)
        except BadRoute:
            self._head = reset
            raise
@@ -223,6 +226,7 @@ class Tokenizer(object):
        while self._read() == "{":
            self._head += 1
            braces += 1
        has_content = False
        self._push()

        while braces:
@@ -230,7 +234,7 @@ class Tokenizer(object):
                return self._emit_text_then_stack("{")
            if braces == 2:
                try:
                    self._parse_template()
                    self._parse_template(has_content)
                except BadRoute:
                    return self._emit_text_then_stack("{{")
                break
@@ -239,11 +243,12 @@ class Tokenizer(object):
                braces -= 3
            except BadRoute:
                try:
                    self._parse_template()
                    self._parse_template(has_content)
                    braces -= 2
                except BadRoute:
                    return self._emit_text_then_stack("{" * braces)
            if braces:
                has_content = True
                self._head += 1

        self._emit_all(self._pop())
@@ -253,6 +258,8 @@ class Tokenizer(object):
    def _handle_template_param(self):
        """Handle a template parameter at the head of the string."""
        if self._context & contexts.TEMPLATE_NAME:
            if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE):
                self._fail_route()
            self._context ^= contexts.TEMPLATE_NAME
        elif self._context & contexts.TEMPLATE_PARAM_VALUE:
            self._context ^= contexts.TEMPLATE_PARAM_VALUE
@@ -271,7 +278,10 @@ class Tokenizer(object):

    def _handle_template_end(self):
        """Handle the end of a template at the head of the string."""
        if self._context & contexts.TEMPLATE_PARAM_KEY:
        if self._context & contexts.TEMPLATE_NAME:
            if not self._context & (contexts.HAS_TEXT | contexts.HAS_TEMPLATE):
                self._fail_route()
        elif self._context & contexts.TEMPLATE_PARAM_KEY:
            self._emit_all(self._pop(keep_context=True))
        self._head += 1
        return self._pop()
@@ -1183,23 +1193,22 @@ class Tokenizer(object):
        elif context & contexts.EXT_LINK_TITLE:
            return this != "\n"
        elif context & contexts.TEMPLATE_NAME:
            if this == "{" or this == "}" or this == "[":
            if this == "{":
                self._context |= contexts.HAS_TEMPLATE | contexts.FAIL_NEXT
                return True
            if this == "}" or (this == "<" and self._read(1) == "!"):
                self._context |= contexts.FAIL_NEXT
                return True
            if this == "]" or this == ">" or (this == "<" and self._read(1) != "!"):
            if this == "[" or this == "]" or this == "<" or this == ">":
                return False
            if this == "|":
                return True
            if context & contexts.HAS_TEXT:
                if context & contexts.FAIL_ON_TEXT:
                    if this is self.END or not this.isspace():
                        if this == "<" and self._read(1) == "!":
                            self._context |= contexts.FAIL_NEXT
                            return True
                        return False
                else:
                    if this == "\n":
                        self._context |= contexts.FAIL_ON_TEXT
                elif this == "\n":
                    self._context |= contexts.FAIL_ON_TEXT
            elif this is self.END or not this.isspace():
                self._context |= contexts.HAS_TEXT
            return True
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -115,8 +115,8 @@ class TestDocs(unittest.TestCase):
    @unittest.skipIf("NOWEB" in os.environ, "web test disabled by environ var")
    def test_readme_5(self):
        """test a block of example code in the README; includes a web call"""
        url1 = "http://en.wikipedia.org/w/api.php"
        url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw"
        url1 = "https://en.wikipedia.org/w/api.php"
        url2 = "https://en.wikipedia.org/w/index.php?title={0}&action=raw"
        title = "Test"
        data = {"action": "query", "prop": "revisions", "rvlimit": 1,
                "rvprop": "content", "format": "json", "titles": title}
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -213,6 +213,9 @@ class TestTemplate(TreeEqualityTestCase):
                                          pgens("f", "g")])
        node37 = Template(wraptext("a"), [pgenh("1", "")])
        node38 = Template(wraptext("abc"))
        node39 = Template(wraptext("a"), [pgenh("1", " b ")])
        node40 = Template(wraptext("a"), [pgenh("1", " b"), pgenh("2", " c")])
        node41 = Template(wraptext("a"), [pgens("1", " b"), pgens("2", " c")])

        node1.add("e", "f", showkey=True)
        node2.add(2, "g", showkey=False)
@@ -255,6 +258,9 @@ class TestTemplate(TreeEqualityTestCase):
        node37.add(1, "b")
        node38.add("1", "foo")
        self.assertRaises(ValueError, node38.add, "z", "bar", showkey=False)
        node39.add("1", "c")
        node40.add("3", "d")
        node41.add("3", "d")

        self.assertEqual("{{a|b=c|d|e=f}}", node1)
        self.assertEqual("{{a|b=c|d|g}}", node2)
@@ -299,6 +305,9 @@ class TestTemplate(TreeEqualityTestCase):
        self.assertEqual("{{a|b=c|d=h|f=g}}", node36)
        self.assertEqual("{{a|b}}", node37)
        self.assertEqual("{{abc|foo}}", node38)
        self.assertEqual("{{a|c}}", node39)
        self.assertEqual("{{a| b| c|d}}", node40)
        self.assertEqual("{{a|1= b|2= c|3= d}}", node41)

    def test_remove(self):
        """test Template.remove()"""
@@ -395,13 +404,13 @@ class TestTemplate(TreeEqualityTestCase):
        self.assertRaises(ValueError, node2.remove, "1")
        self.assertEqual("{{foo}}", node2)
        self.assertEqual("{{foo||abc=}}", node3)
        self.assertEqual("{{foo||baz}}", node4)
        self.assertEqual("{{foo|2=baz}}", node4)
        self.assertEqual("{{foo|b=c}}", node5)
        self.assertEqual("{{foo| a=|b=c}}", node6)
        self.assertEqual("{{foo|1  =|2=c}}", node7)
        self.assertEqual("{{foo|2=c}}", node8)
        self.assertEqual("{{foo||c}}", node9)
        self.assertEqual("{{foo||c}}", node10)
        self.assertEqual("{{foo|2=c}}", node10)
        self.assertEqual("{{foo|b=c|a =d}}", node11)
        self.assertEqual("{{foo| a=|b=c|a =d}}", node12)
        self.assertEqual("{{foo| a=b|a =d}}", node13)
@@ -410,7 +419,7 @@ class TestTemplate(TreeEqualityTestCase):
        self.assertEqual("{{foo| a=b|b=c|a =}}", node16)
        self.assertEqual("{{foo|b|c}}", node17)
        self.assertEqual("{{foo|1  =|b|c}}", node18)
        self.assertEqual("{{foo|1  =a||c}}", node19)
        self.assertEqual("{{foo|1  =a|2=c}}", node19)
        self.assertEqual("{{foo|1  =a||c}}", node20)
        self.assertEqual("{{foo|c=d|e=f}}", node21)
        self.assertEqual("{{foo|a=|c=d|e=f}}", node22)
--- a/tests/tokenizer/integration.mwtest
+++ b/tests/tokenizer/integration.mwtest
@@ -244,6 +244,13 @@ output: [Text(text="{{foobar\n<!|key=value}}")]

 ---

 name:   newline_and_comment_in_template_name_8
 label:  a template name containing a newline followed by a comment
 input:  "{{<!-- comment -->\nfoobar\n<!-- comment -->}}"
 output: [TemplateOpen(), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\nfoobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateClose()]

 ---

 name:   tag_in_link_title
 label:  HTML tags are invalid in link titles, even when complete
 input:  "[[foo<i>bar</i>baz]]"
--- a/tests/tokenizer/templates.mwtest
+++ b/tests/tokenizer/templates.mwtest
@@ -1,17 +1,3 @@
 name:   blank
 label:  template with no content
 input:  "{{}}"
 output: [TemplateOpen(), TemplateClose()]


 name:   blank_with_params
 label:  template with no content, but pipes and equal signs
 input:  "{{||=|}}"
 output: [TemplateOpen(), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()]


 name:   no_params
 label:  simplest type of template
 input:  "{{template}}"
@@ -61,6 +47,13 @@ output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="

 ---

 name:   blank_params
 label:  template with blank parameters (mix of pipes and equal signs)
 input:  "{{,||=|}}"
 output: [TemplateOpen(), Text(text=","), TemplateParamSeparator(), TemplateParamSeparator(), TemplateParamEquals(), TemplateParamSeparator(), TemplateClose()]

 ---

 name:   nested_unnamed_param
 label:  nested template as an unnamed parameter
 input:  "{{foo|{{bar}}}}"
@@ -390,6 +383,34 @@ output: [TemplateOpen(), Text(text="foo\n  "), TemplateParamSeparator(), Text(te

 ---

 name:   invalid_blank
 label:  invalid template with no content
 input:  "{{}}"
 output: [Text(text="{{}}")]

 ---

 name:   invalid_blank_whitespace
 label:  invalid template with no content, but whitespace
 input:  "{{  }}"
 output: [Text(text="{{  }}")]

 ---

 name:   invalid_blank_pipe
 label:  invalid template with no content, but a parameter
 input:  "{{|foo}}"
 output: [Text(text="{{|foo}}")]

 ---

 name:   invalid_blank_whitespace_pipe
 label:  invalid template with no content, but whitespace and a parameter
 input:  "{{  |foo}}"
 output: [Text(text="{{  |foo}}")]

 ---

 name:   invalid_name_left_brace_middle
 label:  invalid characters in template name: left brace in middle
 input:  "{{foo{bar}}"
@@ -665,5 +686,5 @@ output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{

 name:   recursion_opens_and_closes
 label:  test potentially dangerous recursion: template openings and closings
 input:  "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}"
 output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")]
 input:  "{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}"
 output: [Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), TemplateOpen(), Text(text="x"), TemplateParamSeparator(), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x"), TemplateParamSeparator(), Text(text="{{x"), TemplateClose(), Text(text="{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}")]