Merge develop into master (release/0.5.2)

5 лет назад · 623a895cb8
--- a/.coveragerc
+++ b/.coveragerc
@@ -6,4 +6,3 @@ partial_branches =
    pragma: no branch
    if py3k:
    if not py3k:
    if py26:
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,16 +1,12 @@
 language: python
 python:
    - 2.6
    - 2.7
    - 3.2
    - 3.3
    - 3.4
    - 3.5
    - 3.6
    - nightly
 sudo: false
 install:
    - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]]; then pip install coverage==3.7.1; fi
    - pip install coveralls
    - python setup.py build
 script:
--- a/+ 10
+++ b/+ 10
@@ -1,4 +1,13 @@
 v0.5.1 (released March 03, 2018):
 v0.5.2 (released November 1, 2018):

 - Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. (#199, #204)
 - Fixed signals getting stuck inside the C tokenizer until parsing finishes,
  in pathological cases. (#206)
 - Fixed <wbr> not being considered a single-only tag. (#200)
 - Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. (#208)
 - Cleaned up some minor documentation issues. (#207)

 v0.5.1 (released March 3, 2018):

 - Improved behavior when adding parameters to templates (via Template.add())
  with poorly formatted whitespace conventions. (#185)
--- a/README.rst
+++ b/README.rst
@@ -35,81 +35,81 @@ You can run the comprehensive unit testing suite with
 Usage
 -----

 Normal usage is rather straightforward (where ``text`` is page text)::
 Normal usage is rather straightforward (where ``text`` is page text):

    >>> import mwparserfromhell
    >>> wikicode = mwparserfromhell.parse(text)
 >>> import mwparserfromhell
 >>> wikicode = mwparserfromhell.parse(text)

 ``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an
 ordinary ``str`` object (or ``unicode`` in Python 2) with some extra methods.
 For example::

    >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
    >>> wikicode = mwparserfromhell.parse(text)
    >>> print(wikicode)
    I has a template! {{foo|bar|baz|eggs=spam}} See it?
    >>> templates = wikicode.filter_templates()
    >>> print(templates)
    ['{{foo|bar|baz|eggs=spam}}']
    >>> template = templates[0]
    >>> print(template.name)
    foo
    >>> print(template.params)
    ['bar', 'baz', 'eggs=spam']
    >>> print(template.get(1).value)
    bar
    >>> print(template.get("eggs").value)
    spam

 Since nodes can contain other nodes, getting nested templates is trivial::

    >>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}"
    >>> mwparserfromhell.parse(text).filter_templates()
    ['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']
 For example:

 >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
 >>> wikicode = mwparserfromhell.parse(text)
 >>> print(wikicode)
 I has a template! {{foo|bar|baz|eggs=spam}} See it?
 >>> templates = wikicode.filter_templates()
 >>> print(templates)
 ['{{foo|bar|baz|eggs=spam}}']
 >>> template = templates[0]
 >>> print(template.name)
 foo
 >>> print(template.params)
 ['bar', 'baz', 'eggs=spam']
 >>> print(template.get(1).value)
 bar
 >>> print(template.get("eggs").value)
 spam

 Since nodes can contain other nodes, getting nested templates is trivial:

 >>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}"
 >>> mwparserfromhell.parse(text).filter_templates()
 ['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']

 You can also pass ``recursive=False`` to ``filter_templates()`` and explore
 templates manually. This is possible because nodes can contain additional
 ``Wikicode`` objects::

    >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
    >>> print(code.filter_templates(recursive=False))
    ['{{foo|this {{includes a|template}}}}']
    >>> foo = code.filter_templates(recursive=False)[0]
    >>> print(foo.get(1).value)
    this {{includes a|template}}
    >>> print(foo.get(1).value.filter_templates()[0])
    {{includes a|template}}
    >>> print(foo.get(1).value.filter_templates()[0].get(1).value)
    template
 ``Wikicode`` objects:

 >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
 >>> print(code.filter_templates(recursive=False))
 ['{{foo|this {{includes a|template}}}}']
 >>> foo = code.filter_templates(recursive=False)[0]
 >>> print(foo.get(1).value)
 this {{includes a|template}}
 >>> print(foo.get(1).value.filter_templates()[0])
 {{includes a|template}}
 >>> print(foo.get(1).value.filter_templates()[0].get(1).value)
 template

 Templates can be easily modified to add, remove, or alter params. ``Wikicode``
 objects can be treated like lists, with ``append()``, ``insert()``,
 ``remove()``, ``replace()``, and more. They also have a ``matches()`` method
 for comparing page or template names, which takes care of capitalization and
 whitespace::

    >>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"
    >>> code = mwparserfromhell.parse(text)
    >>> for template in code.filter_templates():
    ...     if template.name.matches("Cleanup") and not template.has("date"):
    ...         template.add("date", "July 2012")
    ...
    >>> print(code)
    {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}
    >>> code.replace("{{uncategorized}}", "{{bar-stub}}")
    >>> print(code)
    {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
    >>> print(code.filter_templates())
    ['{{cleanup|date=July 2012}}', '{{bar-stub}}']
 whitespace:

 >>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"
 >>> code = mwparserfromhell.parse(text)
 >>> for template in code.filter_templates():
 ...     if template.name.matches("Cleanup") and not template.has("date"):
 ...         template.add("date", "July 2012")
 ...
 >>> print(code)
 {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}
 >>> code.replace("{{uncategorized}}", "{{bar-stub}}")
 >>> print(code)
 {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
 >>> print(code.filter_templates())
 ['{{cleanup|date=July 2012}}', '{{bar-stub}}']

 You can then convert ``code`` back into a regular ``str`` object (for
 saving the page!) by calling ``str()`` on it::
 saving the page!) by calling ``str()`` on it:

    >>> text = str(code)
    >>> print(text)
    {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
    >>> text == code
    True
 >>> text = str(code)
 >>> print(text)
 {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
 >>> text == code
 True

 Likewise, use ``unicode(code)`` in Python 2.

@@ -164,7 +164,9 @@ Integration
 ``Page`` objects have a ``parse`` method that essentially calls
 ``mwparserfromhell.parse()`` on ``page.get()``.

 If you're using Pywikibot_, your code might look like this::
 If you're using Pywikibot_, your code might look like this:

 .. code-block:: python

    import mwparserfromhell
    import pywikibot
@@ -176,7 +178,9 @@ If you're using Pywikibot_, your code might look like this::
        return mwparserfromhell.parse(text)

 If you're not using a library, you can parse any page using the following
 Python 3 code (via the API_)::
 Python 3 code (via the API_):

 .. code-block:: python

    import json
    from urllib.parse import urlencode
@@ -189,11 +193,11 @@ Python 3 code (via the API_)::
                "rvprop": "content", "format": "json", "titles": title}
        raw = urlopen(API_URL, urlencode(data).encode()).read()
        res = json.loads(raw)
        text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
        text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
        return mwparserfromhell.parse(text)

 .. _MediaWiki:              http://mediawiki.org
 .. _ReadTheDocs:            http://mwparserfromhell.readthedocs.org
 .. _ReadTheDocs:            http://mwparserfromhell.readthedocs.io
 .. _Earwig:                 http://en.wikipedia.org/wiki/User:The_Earwig
 .. _Σ:                      http://en.wikipedia.org/wiki/User:%CE%A3
 .. _Legoktm:                http://en.wikipedia.org/wiki/User:Legoktm
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,6 +1,6 @@
 # This config file is used by appveyor.com to build Windows release binaries

 version: 0.5.1-b{build}
 version: 0.5.2-b{build}

 branches:
  only:
@@ -13,8 +13,9 @@ environment:
  global:
    # See: http://stackoverflow.com/a/13751649/163740
    WRAPPER: "cmd /E:ON /V:ON /C .\\scripts\\win_wrapper.cmd"
    PIP:     "%WRAPPER% %PYTHON%\\Scripts\\pip.exe"
    SETUPPY: "%WRAPPER% %PYTHON%\\python setup.py --with-extension"
    PIP:     "%WRAPPER% %PYTHON%\\python.exe -m pip"
    SETUPPY: "%WRAPPER% %PYTHON%\\python.exe setup.py --with-extension"
    TWINE:   "%WRAPPER% %PYTHON%\\python.exe -m twine"
    PYPI_USERNAME: "earwigbot"
    PYPI_PASSWORD:
      secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+
@@ -28,14 +29,6 @@ environment:
      PYTHON_VERSION: "2.7"
      PYTHON_ARCH:    "64"

    - PYTHON:         "C:\\Python33"
      PYTHON_VERSION: "3.3"
      PYTHON_ARCH:    "32"

    - PYTHON:         "C:\\Python33-x64"
      PYTHON_VERSION: "3.3"
      PYTHON_ARCH:    "64"

    - PYTHON:         "C:\\Python34"
      PYTHON_VERSION: "3.4"
      PYTHON_ARCH:    "32"
@@ -60,6 +53,14 @@ environment:
      PYTHON_VERSION: "3.6"
      PYTHON_ARCH:    "64"

    - PYTHON:         "C:\\Python37"
      PYTHON_VERSION: "3.7"
      PYTHON_ARCH:    "32"

    - PYTHON:         "C:\\Python37-x64"
      PYTHON_VERSION: "3.7"
      PYTHON_ARCH:    "64"

 install:
  - "%PIP% install --disable-pip-version-check --user --upgrade pip"
  - "%PIP% install wheel twine"
@@ -74,7 +75,7 @@ after_test:
  - "%SETUPPY% bdist_wheel"

 on_success:
  - "IF %APPVEYOR_REPO_BRANCH%==master %WRAPPER% %PYTHON%\\python -m twine upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%"
  - "IF %APPVEYOR_REPO_BRANCH%==master %TWINE% upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%"

 artifacts:
  - path: dist\*
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,10 +1,29 @@
 Changelog
 =========

 v0.5.2
 ------

 `Released November 1, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.2>`_
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5.1...v0.5.2>`__):

 - Dropped support for end-of-life Python versions 2.6, 3.2, 3.3.
  (`#199 <https://github.com/earwig/mwparserfromhell/issues/199>`,
  `#204 <https://github.com/earwig/mwparserfromhell/pull/204>`)
 - Fixed signals getting stuck inside the C tokenizer until parsing finishes,
  in pathological cases.
  (`#206 <https://github.com/earwig/mwparserfromhell/issues/206>`)
 - Fixed `<wbr>` not being considered a single-only tag.
  (`#200 <https://github.com/earwig/mwparserfromhell/pull/200>`)
 - Fixed a C tokenizer crash on Python 3.7 when compiled with assertions.
  (`#208 <https://github.com/earwig/mwparserfromhell/issues/208>`)
 - Cleaned up some minor documentation issues.
  (`#207 <https://github.com/earwig/mwparserfromhell/pull/207>`)

 v0.5.1
 ------

 `Released March 03, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.1>`_
 `Released March 3, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.1>`_
 (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5...v0.5.1>`__):

 - Improved behavior when adding parameters to templates (via
--- a/docs/integration.rst
+++ b/docs/integration.rst
@@ -32,7 +32,7 @@ If you're not using a library, you can parse any page using the following code
                "rvprop": "content", "format": "json", "titles": title}
        raw = urlopen(API_URL, urlencode(data).encode()).read()
        res = json.loads(raw)
        text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
        text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
        return mwparserfromhell.parse(text)

 .. _EarwigBot:            https://github.com/earwig/earwigbot
--- a/mwparserfromhell/init.py
+++ b/mwparserfromhell/init.py
@@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
 __author__ = "Ben Kurtovic"
 __copyright__ = "Copyright (C) 2012-2018 Ben Kurtovic"
 __license__ = "MIT License"
 __version__ = "0.5.1"
 __version__ = "0.5.2"
 __email__ = "ben.kurtovic@gmail.com"

 from . import (compat, definitions, nodes, parser, smart_list, string_mixin,
--- a/mwparserfromhell/compat.py
+++ b/mwparserfromhell/compat.py
@@ -10,9 +10,7 @@ meant to be imported directly from within the parser's modules.

 import sys

 py26 = (sys.version_info[0] == 2) and (sys.version_info[1] == 6)
 py3k = (sys.version_info[0] == 3)
 py32 = py3k and (sys.version_info[1] == 2)

 if py3k:
    bytes = bytes
--- a/mwparserfromhell/definitions.py
+++ b/mwparserfromhell/definitions.py
@@ -56,8 +56,8 @@ INVISIBLE_TAGS = [
    "section", "templatedata", "timeline"
 ]

 # [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762
 SINGLE_ONLY = ["br", "hr", "meta", "link", "img"]
 # [mediawiki/core.git]/includes/Sanitizer.php @ 065bec63ea
 SINGLE_ONLY = ["br", "hr", "meta", "link", "img", "wbr"]
 SINGLE = SINGLE_ONLY + ["li", "dt", "dd", "th", "td", "tr"]

 MARKUP_TO_HTML = {
--- a/mwparserfromhell/nodes/extras/attribute.py
+++ b/mwparserfromhell/nodes/extras/attribute.py
@@ -40,7 +40,7 @@ class Attribute(StringMixIn):
                 pad_before_eq="", pad_after_eq="", check_quotes=True):
        super(Attribute, self).__init__()
        if check_quotes and not quotes and self._value_needs_quotes(value):
            raise ValueError("given value {0!r} requires quotes".format(value))
            raise ValueError("given value {!r} requires quotes".format(value))
        self._name = name
        self._value = value
        self._quotes = quotes
@@ -79,7 +79,7 @@ class Attribute(StringMixIn):
        """Coerce a quote type into an acceptable value, or raise an error."""
        orig, quotes = quotes, str(quotes) if quotes else None
        if quotes not in [None, '"', "'"]:
            raise ValueError("{0!r} is not a valid quote type".format(orig))
            raise ValueError("{!r} is not a valid quote type".format(orig))
        return quotes

    @property
--- a/mwparserfromhell/nodes/extras/parameter.py
+++ b/mwparserfromhell/nodes/extras/parameter.py
@@ -41,7 +41,7 @@ class Parameter(StringMixIn):
    def __init__(self, name, value, showkey=True):
        super(Parameter, self).__init__()
        if not showkey and not self.can_hide_key(name):
            raise ValueError("key {0!r} cannot be hidden".format(name))
            raise ValueError("key {!r} cannot be hidden".format(name))
        self._name = name
        self._value = value
        self._showkey = showkey
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -53,10 +53,10 @@ class HTMLEntity(Node):

    def __unicode__(self):
        if self.named:
            return "&{0};".format(self.value)
            return "&{};".format(self.value)
        if self.hexadecimal:
            return "&#{0}{1};".format(self.hex_char, self.value)
        return "&#{0};".format(self.value)
            return "&#{}{};".format(self.hex_char, self.value)
        return "&#{};".format(self.value)

    def __strip__(self, **kwargs):
        if kwargs.get("normalize"):
--- a/mwparserfromhell/parser/init.py
+++ b/mwparserfromhell/parser/init.py
@@ -35,7 +35,7 @@ class ParserError(Exception):
    can happen. Its appearance indicates a bug.
    """
    def __init__(self, extra):
        msg = "This is a bug and should be reported. Info: {0}.".format(extra)
        msg = "This is a bug and should be reported. Info: {}.".format(extra)
        super(ParserError, self).__init__(msg)


--- a/mwparserfromhell/parser/ctokenizer/definitions.c
+++ b/mwparserfromhell/parser/ctokenizer/definitions.c
@@ -45,11 +45,12 @@ static const char* PARSER_BLACKLIST[] = {
 };

 static const char* SINGLE[] = {
    "br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", NULL
    "br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr",
    "wbr", NULL
 };

 static const char* SINGLE_ONLY[] = {
    "br", "hr", "meta", "link", "img", NULL
    "br", "hr", "meta", "link", "img", "wbr", NULL
 };

 /*
--- a/mwparserfromhell/parser/ctokenizer/tok_parse.c
+++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c
@@ -2603,6 +2603,8 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
        }
        if (!this)
            return Tokenizer_handle_end(self, this_context);
        if (PyErr_CheckSignals())
            return NULL;
        next = Tokenizer_read(self, 1);
        last = Tokenizer_read_backwards(self, 1);
        if (this == next && next == '{') {
--- a/mwparserfromhell/parser/ctokenizer/tokenizer.c
+++ b/mwparserfromhell/parser/ctokenizer/tokenizer.c
@@ -207,7 +207,7 @@ static int load_entities(void)
    if (!deflist)
        return -1;
    Py_DECREF(defmap);
    numdefs = (unsigned) PyList_GET_SIZE(defmap);
    numdefs = (unsigned) PyList_GET_SIZE(deflist);
    entitydefs = calloc(numdefs + 1, sizeof(char*));
    if (!entitydefs)
        return -1;
--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -44,7 +44,7 @@ class Token(dict):
                args.append(key + "=" + repr(value[:97] + "..."))
            else:
                args.append(key + "=" + repr(value))
        return "{0}({1})".format(type(self).__name__, ", ".join(args))
        return "{}({})".format(type(self).__name__, ", ".join(args))

    def __eq__(self, other):
        return isinstance(other, type(self)) and dict.__eq__(self, other)
--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -28,7 +28,7 @@ interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner.
 from __future__ import unicode_literals
 from sys import getdefaultencoding

 from .compat import bytes, py26, py3k, str
 from .compat import bytes, py3k, str

 __all__ = ["StringMixIn"]

@@ -109,21 +109,12 @@ class StringMixIn(object):

    def __getattr__(self, attr):
        if not hasattr(str, attr):
            raise AttributeError("{0!r} object has no attribute {1!r}".format(
            raise AttributeError("{!r} object has no attribute {!r}".format(
                type(self).__name__, attr))
        return getattr(self.__unicode__(), attr)

    if py3k:
        maketrans = str.maketrans  # Static method can't rely on __getattr__

    if py26:
        @inheritdoc
        def encode(self, encoding=None, errors=None):
            if encoding is None:
                encoding = getdefaultencoding()
            if errors is not None:
                return self.__unicode__().encode(encoding, errors)
            return self.__unicode__().encode(encoding)


 del inheritdoc
--- a/scripts/memtest.py
+++ b/scripts/memtest.py
@@ -80,7 +80,7 @@ class MemoryTest(object):
                    raw = raw.encode("raw_unicode_escape")
                    data["input"] = raw.decode("unicode_escape")
            number = str(counter).zfill(digits)
            fname = "test_{0}{1}_{2}".format(name, number, data["name"])
            fname = "test_{}{}_{}".format(name, number, data["name"])
            self._tests.append((fname, data["input"]))
            counter += 1

@@ -117,7 +117,7 @@ class MemoryTest(object):
            tmpl = "{0}LEAKING{1}: {2:n} bytes, {3:.2%} inc ({4:n} bytes/loop)"
            sys.stdout.write(tmpl.format(Color.YELLOW, Color.RESET, d, p, bpt))
        else:
            sys.stdout.write("{0}OK{1}".format(Color.GREEN, Color.RESET))
            sys.stdout.write("{}OK{}".format(Color.GREEN, Color.RESET))

    def run(self):
        """Run the memory test suite."""
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -9,7 +9,7 @@ fi

 VERSION=$1
 SCRIPT_DIR=$(dirname "$0")
 RELEASE_DATE=$(date +"%B %d, %Y")
 RELEASE_DATE=$(date +"%B %-d, %Y")

 check_git() {
    if [[ -n "$(git status --porcelain --untracked-files=no)" ]]; then
@@ -76,9 +76,8 @@ do_git_stuff() {
 }

 upload_to_pypi() {
    echo -n "PyPI: uploading source tarball and docs..."
    echo -n "PyPI: uploading source tarball..."
    python setup.py -q register sdist upload -s
    python setup.py -q upload_docs
    echo " done."
 }

@@ -88,7 +87,7 @@ post_release() {
    echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION"
    echo "*** Verify: https://pypi.python.org/pypi/mwparserfromhell"
    echo "*** Verify: https://ci.appveyor.com/project/earwig/mwparserfromhell"
    echo "*** Verify: https://mwparserfromhell.readthedocs.org"
    echo "*** Verify: https://mwparserfromhell.readthedocs.io"
    echo "*** Press enter to sanity-check the release."
    read
 }
--- a/setup.py
+++ b/setup.py
@@ -27,15 +27,15 @@ from glob import glob
 from os import environ
 import sys

 if ((sys.version_info[0] == 2 and sys.version_info[1] < 6) or
    (sys.version_info[1] == 3 and sys.version_info[1] < 2)):
    raise RuntimeError("mwparserfromhell needs Python 2.6+ or 3.2+")
 if ((sys.version_info[0] == 2 and sys.version_info[1] < 7) or
    (sys.version_info[1] == 3 and sys.version_info[1] < 4)):
    raise RuntimeError("mwparserfromhell needs Python 2.7 or 3.4+")

 from setuptools import setup, find_packages, Extension
 from setuptools.command.build_ext import build_ext

 from mwparserfromhell import __version__
 from mwparserfromhell.compat import py26, py3k
 from mwparserfromhell.compat import py3k

 with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp:
    long_docs = fp.read()
@@ -76,21 +76,20 @@ if fallback:

 tokenizer = Extension("mwparserfromhell.parser._tokenizer",
                      sources=sorted(glob("mwparserfromhell/parser/ctokenizer/*.c")),
                      depends=glob("mwparserfromhell/parser/ctokenizer/*.h"))
                      depends=sorted(glob("mwparserfromhell/parser/ctokenizer/*.h")))

 setup(
    name = "mwparserfromhell",
    packages = find_packages(exclude=("tests",)),
    ext_modules = [tokenizer] if use_extension else [],
    tests_require = ["unittest2"] if py26 else [],
    test_suite = "tests.discover",
    test_suite = "tests",
    version = __version__,
    author = "Ben Kurtovic",
    author_email = "ben.kurtovic@gmail.com",
    url = "https://github.com/earwig/mwparserfromhell",
    description = "MWParserFromHell is a parser for MediaWiki wikicode.",
    long_description = long_docs,
    download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{0}".format(__version__),
    download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{}".format(__version__),
    keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing",
    license = "MIT License",
    classifiers = [
@@ -99,11 +98,9 @@ setup(
        "Intended Audience :: Developers",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
        "Programming Language :: Python :: 2.6",
        "Programming Language :: Python :: 2",
        "Programming Language :: Python :: 2.7",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.2",
        "Programming Language :: Python :: 3.3",
        "Programming Language :: Python :: 3.4",
        "Programming Language :: Python :: 3.5",
        "Programming Language :: Python :: 3.6",
--- a/tests/_test_tokenizer.py
+++ b/tests/_test_tokenizer.py
@@ -109,7 +109,7 @@ class TokenizerTestCase(object):
                print(error.format(filename))
                continue
            if data["input"] is None or data["output"] is None:
                error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output"
                error = "Test '{}' in '{}' was ignored because it lacked an input or an output"
                print(error.format(data["name"], filename))
                continue

@@ -118,7 +118,7 @@ class TokenizerTestCase(object):
            if restrict and data["name"] != restrict:
                continue

            fname = "test_{0}{1}_{2}".format(name, number, data["name"])
            fname = "test_{}{}_{}".format(name, number, data["name"])
            meth = cls._build_test_method(fname, data)
            setattr(cls, fname, meth)

@@ -126,7 +126,7 @@ class TokenizerTestCase(object):
    def build(cls):
        """Load and install all tests from the 'tokenizer' directory."""
        def load_file(filename, restrict=None):
            with codecs.open(filename, "rU", encoding="utf8") as fp:
            with codecs.open(filename, "r", encoding="utf8") as fp:
                text = fp.read()
                name = path.split(filename)[1][:-len(extension)]
                cls._load_tests(filename, name, text, restrict)
--- a/tests/_test_tree_equality.py
+++ b/tests/_test_tree_equality.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    from unittest2 import TestCase
 except ImportError:
    from unittest import TestCase
 from unittest import TestCase

 from mwparserfromhell.compat import range
 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
--- a/tests/discover.py
+++ b/tests/discover.py
@@ -1,24 +0,0 @@
 # -*- coding: utf-8 -*-

 """
 Discover tests using ``unittest2` for Python 2.6.

 It appears the default distutils test suite doesn't play nice with
 ``setUpClass`` thereby making some tests fail. Using ``unittest2`` to load
 tests seems to work around that issue.

 http://stackoverflow.com/a/17004409/753501
 """

 import os.path

 from mwparserfromhell.compat import py26

 if py26:
    import unittest2 as unittest
 else:
    import unittest

 def additional_tests():
    project_root = os.path.split(os.path.dirname(__file__))[0]
    return unittest.defaultTestLoader.discover(project_root)
--- a/tests/test_argument.py
+++ b/tests/test_argument.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Argument, Text
--- a/tests/test_attribute.py
+++ b/tests/test_attribute.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Template
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import py3k
 from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading,
--- a/tests/test_comment.py
+++ b/tests/test_comment.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Comment
--- a/tests/test_ctokenizer.py
+++ b/tests/test_ctokenizer.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 try:
    from mwparserfromhell.parser._tokenizer import CTokenizer
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -23,11 +23,7 @@
 from __future__ import print_function, unicode_literals
 import json
 import os

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 import mwparserfromhell
 from mwparserfromhell.compat import py3k, str
--- a/tests/test_external_link.py
+++ b/tests/test_external_link.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import ExternalLink, Text
--- a/tests/test_heading.py
+++ b/tests/test_heading.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Heading, Text
--- a/tests/test_html_entity.py
+++ b/tests/test_html_entity.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import HTMLEntity
--- a/tests/test_parameter.py
+++ b/tests/test_parameter.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell import parser
 from mwparserfromhell.compat import range
--- a/tests/test_pytokenizer.py
+++ b/tests/test_pytokenizer.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.parser.tokenizer import Tokenizer

--- a/tests/test_roundtripping.py
+++ b/tests/test_roundtripping.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from ._test_tokenizer import TokenizerTestCase

--- a/tests/test_smart_list.py
+++ b/tests/test_smart_list.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import py3k, range
 from mwparserfromhell.smart_list import SmartList, _ListProxy
@@ -139,36 +135,36 @@ class TestSmartList(unittest.TestCase):
            self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1))
            self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1))

        self.assertTrue(list1 < list3)
        self.assertTrue(list1 <= list3)
        self.assertFalse(list1 == list3)
        self.assertTrue(list1 != list3)
        self.assertFalse(list1 > list3)
        self.assertFalse(list1 >= list3)
        self.assertLess(list1, list3)
        self.assertLessEqual(list1, list3)
        self.assertNotEqual(list1, list3)
        self.assertNotEqual(list1, list3)
        self.assertLessEqual(list1, list3)
        self.assertLess(list1, list3)

        other1 = [0, 2, 3, 4]
        self.assertTrue(list1 < other1)
        self.assertTrue(list1 <= other1)
        self.assertFalse(list1 == other1)
        self.assertTrue(list1 != other1)
        self.assertFalse(list1 > other1)
        self.assertFalse(list1 >= other1)
        self.assertLess(list1, other1)
        self.assertLessEqual(list1, other1)
        self.assertNotEqual(list1, other1)
        self.assertNotEqual(list1, other1)
        self.assertLessEqual(list1, other1)
        self.assertLess(list1, other1)

        other2 = [0, 0, 1, 2]
        self.assertFalse(list1 < other2)
        self.assertFalse(list1 <= other2)
        self.assertFalse(list1 == other2)
        self.assertTrue(list1 != other2)
        self.assertTrue(list1 > other2)
        self.assertTrue(list1 >= other2)
        self.assertGreaterEqual(list1, other2)
        self.assertGreater(list1, other2)
        self.assertNotEqual(list1, other2)
        self.assertNotEqual(list1, other2)
        self.assertGreater(list1, other2)
        self.assertGreaterEqual(list1, other2)

        other3 = [0, 1, 2, 3, "one", "two"]
        self.assertFalse(list1 < other3)
        self.assertTrue(list1 <= other3)
        self.assertTrue(list1 == other3)
        self.assertFalse(list1 != other3)
        self.assertFalse(list1 > other3)
        self.assertTrue(list1 >= other3)
        self.assertGreaterEqual(list1, other3)
        self.assertLessEqual(list1, other3)
        self.assertEqual(list1, other3)
        self.assertEqual(list1, other3)
        self.assertLessEqual(list1, other3)
        self.assertGreaterEqual(list1, other3)

        self.assertTrue(bool(list1))
        self.assertFalse(bool(list2))
@@ -198,10 +194,10 @@ class TestSmartList(unittest.TestCase):
        self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
        self.assertEqual([], list(reversed(list2)))

        self.assertTrue("one" in list1)
        self.assertTrue(3 in list1)
        self.assertFalse(10 in list1)
        self.assertFalse(0 in list2)
        self.assertIn("one", list1)
        self.assertIn(3, list1)
        self.assertNotIn(10, list1)
        self.assertNotIn(0, list2)

        self.assertEqual([], list2 * 5)
        self.assertEqual([], 5 * list2)
--- a/tests/test_string_mixin.py
+++ b/tests/test_string_mixin.py
@@ -23,13 +23,9 @@
 from __future__ import unicode_literals
 from sys import getdefaultencoding
 from types import GeneratorType
 import unittest

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest

 from mwparserfromhell.compat import bytes, py3k, py32, range, str
 from mwparserfromhell.compat import bytes, py3k, range, str
 from mwparserfromhell.string_mixin import StringMixIn

 class _FakeString(StringMixIn):
@@ -54,9 +50,7 @@ class TestStringMixIn(unittest.TestCase):
            "rsplit", "rstrip", "split", "splitlines", "startswith", "strip",
            "swapcase", "title", "translate", "upper", "zfill"]
        if py3k:
            if not py32:
                methods.append("casefold")
            methods.extend(["format_map", "isidentifier", "isprintable",
            methods.extend(["casefold", "format_map", "isidentifier", "isprintable",
                            "maketrans"])
        else:
            methods.append("decode")
@@ -90,33 +84,33 @@ class TestStringMixIn(unittest.TestCase):
        str4 = "this is a fake string"
        str5 = "fake string, this is"

        self.assertFalse(str1 > str2)
        self.assertTrue(str1 >= str2)
        self.assertTrue(str1 == str2)
        self.assertFalse(str1 != str2)
        self.assertFalse(str1 < str2)
        self.assertTrue(str1 <= str2)

        self.assertTrue(str1 > str3)
        self.assertTrue(str1 >= str3)
        self.assertFalse(str1 == str3)
        self.assertTrue(str1 != str3)
        self.assertFalse(str1 < str3)
        self.assertFalse(str1 <= str3)

        self.assertFalse(str1 > str4)
        self.assertTrue(str1 >= str4)
        self.assertTrue(str1 == str4)
        self.assertFalse(str1 != str4)
        self.assertFalse(str1 < str4)
        self.assertTrue(str1 <= str4)

        self.assertFalse(str5 > str1)
        self.assertFalse(str5 >= str1)
        self.assertFalse(str5 == str1)
        self.assertTrue(str5 != str1)
        self.assertTrue(str5 < str1)
        self.assertTrue(str5 <= str1)
        self.assertLessEqual(str1, str2)
        self.assertGreaterEqual(str1, str2)
        self.assertEqual(str1, str2)
        self.assertEqual(str1, str2)
        self.assertGreaterEqual(str1, str2)
        self.assertLessEqual(str1, str2)

        self.assertGreater(str1, str3)
        self.assertGreaterEqual(str1, str3)
        self.assertNotEqual(str1, str3)
        self.assertNotEqual(str1, str3)
        self.assertGreaterEqual(str1, str3)
        self.assertGreater(str1, str3)

        self.assertLessEqual(str1, str4)
        self.assertGreaterEqual(str1, str4)
        self.assertEqual(str1, str4)
        self.assertEqual(str1, str4)
        self.assertGreaterEqual(str1, str4)
        self.assertLessEqual(str1, str4)

        self.assertLessEqual(str5, str1)
        self.assertLess(str5, str1)
        self.assertNotEqual(str5, str1)
        self.assertNotEqual(str5, str1)
        self.assertLess(str5, str1)
        self.assertLessEqual(str5, str1)

    def test_other_magics(self):
        """test other magically implemented features, like len() and iter()"""
@@ -161,13 +155,13 @@ class TestStringMixIn(unittest.TestCase):
        self.assertRaises(IndexError, lambda: str1[11])
        self.assertRaises(IndexError, lambda: str2[0])

        self.assertTrue("k" in str1)
        self.assertTrue("fake" in str1)
        self.assertTrue("str" in str1)
        self.assertTrue("" in str1)
        self.assertTrue("" in str2)
        self.assertFalse("real" in str1)
        self.assertFalse("s" in str2)
        self.assertIn("k", str1)
        self.assertIn("fake", str1)
        self.assertIn("str", str1)
        self.assertIn("", str1)
        self.assertIn("", str2)
        self.assertNotIn("real", str1)
        self.assertNotIn("s", str2)

    def test_other_methods(self):
        """test the remaining non-magic methods of StringMixIn"""
@@ -329,7 +323,7 @@ class TestStringMixIn(unittest.TestCase):
        self.assertEqual("", str15.lower())
        self.assertEqual("foobar", str16.lower())
        self.assertEqual("ß", str22.lower())
        if py3k and not py32:
        if py3k:
            self.assertEqual("", str15.casefold())
            self.assertEqual("foobar", str16.casefold())
            self.assertEqual("ss", str22.casefold())
@@ -378,7 +372,7 @@ class TestStringMixIn(unittest.TestCase):
        self.assertEqual(actual, str25.rsplit(None, 3))
        actual = ["   this is a   sentence with", "", "whitespace", ""]
        self.assertEqual(actual, str25.rsplit(" ", 3))
        if py3k and not py32:
        if py3k:
            actual = ["   this is a", "sentence", "with", "whitespace"]
            self.assertEqual(actual, str25.rsplit(maxsplit=3))

@@ -396,7 +390,7 @@ class TestStringMixIn(unittest.TestCase):
        self.assertEqual(actual, str25.split(None, 3))
        actual = ["", "", "", "this is a   sentence with  whitespace "]
        self.assertEqual(actual, str25.split(" ", 3))
        if py3k and not py32:
        if py3k:
            actual = ["this", "is", "a", "sentence with  whitespace "]
            self.assertEqual(actual, str25.split(maxsplit=3))

--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Tag, Template, Text
--- a/tests/test_template.py
+++ b/tests/test_template.py
@@ -22,11 +22,7 @@

 from __future__ import unicode_literals
 from difflib import unified_diff

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import HTMLEntity, Template, Text
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import py3k
 from mwparserfromhell.parser import tokens
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.nodes import Template, Text
 from mwparserfromhell.utils import parse_anything
--- a/tests/test_wikicode.py
+++ b/tests/test_wikicode.py
@@ -24,11 +24,7 @@ from __future__ import unicode_literals
 from functools import partial
 import re
 from types import GeneratorType

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import py3k, str
 from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
--- a/tests/test_wikilink.py
+++ b/tests/test_wikilink.py
@@ -21,11 +21,7 @@
 # SOFTWARE.

 from __future__ import unicode_literals

 try:
    import unittest2 as unittest
 except ImportError:
    import unittest
 import unittest

 from mwparserfromhell.compat import str
 from mwparserfromhell.nodes import Text, Wikilink