diff --git a/.coveragerc b/.coveragerc index 48a64ce..909a0e2 100644 --- a/.coveragerc +++ b/.coveragerc @@ -6,4 +6,3 @@ partial_branches = pragma: no branch if py3k: if not py3k: - if py26: diff --git a/.travis.yml b/.travis.yml index c0233d9..5fc2718 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,12 @@ language: python python: - - 2.6 - 2.7 - - 3.2 - - 3.3 - 3.4 - 3.5 - 3.6 - nightly sudo: false install: - - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]]; then pip install coverage==3.7.1; fi - pip install coveralls - python setup.py build script: diff --git a/CHANGELOG b/CHANGELOG index 64f91db..cb6d263 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,13 @@ -v0.5.1 (released March 03, 2018): +v0.5.2 (released November 1, 2018): + +- Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. (#199, #204) +- Fixed signals getting stuck inside the C tokenizer until parsing finishes, + in pathological cases. (#206) +- Fixed not being considered a single-only tag. (#200) +- Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. (#208) +- Cleaned up some minor documentation issues. (#207) + +v0.5.1 (released March 3, 2018): - Improved behavior when adding parameters to templates (via Template.add()) with poorly formatted whitespace conventions. (#185) diff --git a/README.rst b/README.rst index ab1bef9..cf2715f 100644 --- a/README.rst +++ b/README.rst @@ -35,81 +35,81 @@ You can run the comprehensive unit testing suite with Usage ----- -Normal usage is rather straightforward (where ``text`` is page text):: +Normal usage is rather straightforward (where ``text`` is page text): - >>> import mwparserfromhell - >>> wikicode = mwparserfromhell.parse(text) +>>> import mwparserfromhell +>>> wikicode = mwparserfromhell.parse(text) ``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an ordinary ``str`` object (or ``unicode`` in Python 2) with some extra methods. -For example:: - - >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" - >>> wikicode = mwparserfromhell.parse(text) - >>> print(wikicode) - I has a template! {{foo|bar|baz|eggs=spam}} See it? - >>> templates = wikicode.filter_templates() - >>> print(templates) - ['{{foo|bar|baz|eggs=spam}}'] - >>> template = templates[0] - >>> print(template.name) - foo - >>> print(template.params) - ['bar', 'baz', 'eggs=spam'] - >>> print(template.get(1).value) - bar - >>> print(template.get("eggs").value) - spam - -Since nodes can contain other nodes, getting nested templates is trivial:: - - >>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" - >>> mwparserfromhell.parse(text).filter_templates() - ['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] +For example: + +>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" +>>> wikicode = mwparserfromhell.parse(text) +>>> print(wikicode) +I has a template! {{foo|bar|baz|eggs=spam}} See it? +>>> templates = wikicode.filter_templates() +>>> print(templates) +['{{foo|bar|baz|eggs=spam}}'] +>>> template = templates[0] +>>> print(template.name) +foo +>>> print(template.params) +['bar', 'baz', 'eggs=spam'] +>>> print(template.get(1).value) +bar +>>> print(template.get("eggs").value) +spam + +Since nodes can contain other nodes, getting nested templates is trivial: + +>>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" +>>> mwparserfromhell.parse(text).filter_templates() +['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] You can also pass ``recursive=False`` to ``filter_templates()`` and explore templates manually. This is possible because nodes can contain additional -``Wikicode`` objects:: - - >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") - >>> print(code.filter_templates(recursive=False)) - ['{{foo|this {{includes a|template}}}}'] - >>> foo = code.filter_templates(recursive=False)[0] - >>> print(foo.get(1).value) - this {{includes a|template}} - >>> print(foo.get(1).value.filter_templates()[0]) - {{includes a|template}} - >>> print(foo.get(1).value.filter_templates()[0].get(1).value) - template +``Wikicode`` objects: + +>>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") +>>> print(code.filter_templates(recursive=False)) +['{{foo|this {{includes a|template}}}}'] +>>> foo = code.filter_templates(recursive=False)[0] +>>> print(foo.get(1).value) +this {{includes a|template}} +>>> print(foo.get(1).value.filter_templates()[0]) +{{includes a|template}} +>>> print(foo.get(1).value.filter_templates()[0].get(1).value) +template Templates can be easily modified to add, remove, or alter params. ``Wikicode`` objects can be treated like lists, with ``append()``, ``insert()``, ``remove()``, ``replace()``, and more. They also have a ``matches()`` method for comparing page or template names, which takes care of capitalization and -whitespace:: - - >>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" - >>> code = mwparserfromhell.parse(text) - >>> for template in code.filter_templates(): - ... if template.name.matches("Cleanup") and not template.has("date"): - ... template.add("date", "July 2012") - ... - >>> print(code) - {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} - >>> code.replace("{{uncategorized}}", "{{bar-stub}}") - >>> print(code) - {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} - >>> print(code.filter_templates()) - ['{{cleanup|date=July 2012}}', '{{bar-stub}}'] +whitespace: + +>>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" +>>> code = mwparserfromhell.parse(text) +>>> for template in code.filter_templates(): +... if template.name.matches("Cleanup") and not template.has("date"): +... template.add("date", "July 2012") +... +>>> print(code) +{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} +>>> code.replace("{{uncategorized}}", "{{bar-stub}}") +>>> print(code) +{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} +>>> print(code.filter_templates()) +['{{cleanup|date=July 2012}}', '{{bar-stub}}'] You can then convert ``code`` back into a regular ``str`` object (for -saving the page!) by calling ``str()`` on it:: +saving the page!) by calling ``str()`` on it: - >>> text = str(code) - >>> print(text) - {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} - >>> text == code - True +>>> text = str(code) +>>> print(text) +{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} +>>> text == code +True Likewise, use ``unicode(code)`` in Python 2. @@ -164,7 +164,9 @@ Integration ``Page`` objects have a ``parse`` method that essentially calls ``mwparserfromhell.parse()`` on ``page.get()``. -If you're using Pywikibot_, your code might look like this:: +If you're using Pywikibot_, your code might look like this: + +.. code-block:: python import mwparserfromhell import pywikibot @@ -176,7 +178,9 @@ If you're using Pywikibot_, your code might look like this:: return mwparserfromhell.parse(text) If you're not using a library, you can parse any page using the following -Python 3 code (via the API_):: +Python 3 code (via the API_): + +.. code-block:: python import json from urllib.parse import urlencode @@ -189,11 +193,11 @@ Python 3 code (via the API_):: "rvprop": "content", "format": "json", "titles": title} raw = urlopen(API_URL, urlencode(data).encode()).read() res = json.loads(raw) - text = res["query"]["pages"].values()[0]["revisions"][0]["*"] + text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] return mwparserfromhell.parse(text) .. _MediaWiki: http://mediawiki.org -.. _ReadTheDocs: http://mwparserfromhell.readthedocs.org +.. _ReadTheDocs: http://mwparserfromhell.readthedocs.io .. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig .. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 .. _Legoktm: http://en.wikipedia.org/wiki/User:Legoktm diff --git a/appveyor.yml b/appveyor.yml index e99f54e..df48d7a 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ # This config file is used by appveyor.com to build Windows release binaries -version: 0.5.1-b{build} +version: 0.5.2-b{build} branches: only: @@ -13,8 +13,9 @@ environment: global: # See: http://stackoverflow.com/a/13751649/163740 WRAPPER: "cmd /E:ON /V:ON /C .\\scripts\\win_wrapper.cmd" - PIP: "%WRAPPER% %PYTHON%\\Scripts\\pip.exe" - SETUPPY: "%WRAPPER% %PYTHON%\\python setup.py --with-extension" + PIP: "%WRAPPER% %PYTHON%\\python.exe -m pip" + SETUPPY: "%WRAPPER% %PYTHON%\\python.exe setup.py --with-extension" + TWINE: "%WRAPPER% %PYTHON%\\python.exe -m twine" PYPI_USERNAME: "earwigbot" PYPI_PASSWORD: secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+ @@ -28,14 +29,6 @@ environment: PYTHON_VERSION: "2.7" PYTHON_ARCH: "64" - - PYTHON: "C:\\Python33" - PYTHON_VERSION: "3.3" - PYTHON_ARCH: "32" - - - PYTHON: "C:\\Python33-x64" - PYTHON_VERSION: "3.3" - PYTHON_ARCH: "64" - - PYTHON: "C:\\Python34" PYTHON_VERSION: "3.4" PYTHON_ARCH: "32" @@ -60,6 +53,14 @@ environment: PYTHON_VERSION: "3.6" PYTHON_ARCH: "64" + - PYTHON: "C:\\Python37" + PYTHON_VERSION: "3.7" + PYTHON_ARCH: "32" + + - PYTHON: "C:\\Python37-x64" + PYTHON_VERSION: "3.7" + PYTHON_ARCH: "64" + install: - "%PIP% install --disable-pip-version-check --user --upgrade pip" - "%PIP% install wheel twine" @@ -74,7 +75,7 @@ after_test: - "%SETUPPY% bdist_wheel" on_success: - - "IF %APPVEYOR_REPO_BRANCH%==master %WRAPPER% %PYTHON%\\python -m twine upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%" + - "IF %APPVEYOR_REPO_BRANCH%==master %TWINE% upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%" artifacts: - path: dist\* diff --git a/docs/changelog.rst b/docs/changelog.rst index 4e637d2..4092e25 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,10 +1,29 @@ Changelog ========= +v0.5.2 +------ + +`Released November 1, 2018 `_ +(`changes `__): + +- Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. + (`#199 `, + `#204 `) +- Fixed signals getting stuck inside the C tokenizer until parsing finishes, + in pathological cases. + (`#206 `) +- Fixed `` not being considered a single-only tag. + (`#200 `) +- Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. + (`#208 `) +- Cleaned up some minor documentation issues. + (`#207 `) + v0.5.1 ------ -`Released March 03, 2018 `_ +`Released March 3, 2018 `_ (`changes `__): - Improved behavior when adding parameters to templates (via diff --git a/docs/integration.rst b/docs/integration.rst index af3abc9..c1c1f39 100644 --- a/docs/integration.rst +++ b/docs/integration.rst @@ -32,7 +32,7 @@ If you're not using a library, you can parse any page using the following code "rvprop": "content", "format": "json", "titles": title} raw = urlopen(API_URL, urlencode(data).encode()).read() res = json.loads(raw) - text = res["query"]["pages"].values()[0]["revisions"][0]["*"] + text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] return mwparserfromhell.parse(text) .. _EarwigBot: https://github.com/earwig/earwigbot diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index ab8514a..a6de13a 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki `_ wikicode. __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012-2018 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.5.1" +__version__ = "0.5.2" __email__ = "ben.kurtovic@gmail.com" from . import (compat, definitions, nodes, parser, smart_list, string_mixin, diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 7a83cd1..85f9d48 100644 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -10,9 +10,7 @@ meant to be imported directly from within the parser's modules. import sys -py26 = (sys.version_info[0] == 2) and (sys.version_info[1] == 6) py3k = (sys.version_info[0] == 3) -py32 = py3k and (sys.version_info[1] == 2) if py3k: bytes = bytes diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py index 18a06cc..4399970 100644 --- a/mwparserfromhell/definitions.py +++ b/mwparserfromhell/definitions.py @@ -56,8 +56,8 @@ INVISIBLE_TAGS = [ "section", "templatedata", "timeline" ] -# [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762 -SINGLE_ONLY = ["br", "hr", "meta", "link", "img"] +# [mediawiki/core.git]/includes/Sanitizer.php @ 065bec63ea +SINGLE_ONLY = ["br", "hr", "meta", "link", "img", "wbr"] SINGLE = SINGLE_ONLY + ["li", "dt", "dd", "th", "td", "tr"] MARKUP_TO_HTML = { diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 0f55a6b..59473c4 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -40,7 +40,7 @@ class Attribute(StringMixIn): pad_before_eq="", pad_after_eq="", check_quotes=True): super(Attribute, self).__init__() if check_quotes and not quotes and self._value_needs_quotes(value): - raise ValueError("given value {0!r} requires quotes".format(value)) + raise ValueError("given value {!r} requires quotes".format(value)) self._name = name self._value = value self._quotes = quotes @@ -79,7 +79,7 @@ class Attribute(StringMixIn): """Coerce a quote type into an acceptable value, or raise an error.""" orig, quotes = quotes, str(quotes) if quotes else None if quotes not in [None, '"', "'"]: - raise ValueError("{0!r} is not a valid quote type".format(orig)) + raise ValueError("{!r} is not a valid quote type".format(orig)) return quotes @property diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index 0d21d5b..dff8492 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -41,7 +41,7 @@ class Parameter(StringMixIn): def __init__(self, name, value, showkey=True): super(Parameter, self).__init__() if not showkey and not self.can_hide_key(name): - raise ValueError("key {0!r} cannot be hidden".format(name)) + raise ValueError("key {!r} cannot be hidden".format(name)) self._name = name self._value = value self._showkey = showkey diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index d5e9d73..ea534e9 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -53,10 +53,10 @@ class HTMLEntity(Node): def __unicode__(self): if self.named: - return "&{0};".format(self.value) + return "&{};".format(self.value) if self.hexadecimal: - return "&#{0}{1};".format(self.hex_char, self.value) - return "&#{0};".format(self.value) + return "&#{}{};".format(self.hex_char, self.value) + return "&#{};".format(self.value) def __strip__(self, **kwargs): if kwargs.get("normalize"): diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index f39fdc4..de60226 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -35,7 +35,7 @@ class ParserError(Exception): can happen. Its appearance indicates a bug. """ def __init__(self, extra): - msg = "This is a bug and should be reported. Info: {0}.".format(extra) + msg = "This is a bug and should be reported. Info: {}.".format(extra) super(ParserError, self).__init__(msg) diff --git a/mwparserfromhell/parser/ctokenizer/definitions.c b/mwparserfromhell/parser/ctokenizer/definitions.c index 38482a4..e247234 100644 --- a/mwparserfromhell/parser/ctokenizer/definitions.c +++ b/mwparserfromhell/parser/ctokenizer/definitions.c @@ -45,11 +45,12 @@ static const char* PARSER_BLACKLIST[] = { }; static const char* SINGLE[] = { - "br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", NULL + "br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", + "wbr", NULL }; static const char* SINGLE_ONLY[] = { - "br", "hr", "meta", "link", "img", NULL + "br", "hr", "meta", "link", "img", "wbr", NULL }; /* diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index 1998368..3a2cda9 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -2603,6 +2603,8 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) } if (!this) return Tokenizer_handle_end(self, this_context); + if (PyErr_CheckSignals()) + return NULL; next = Tokenizer_read(self, 1); last = Tokenizer_read_backwards(self, 1); if (this == next && next == '{') { diff --git a/mwparserfromhell/parser/ctokenizer/tokenizer.c b/mwparserfromhell/parser/ctokenizer/tokenizer.c index 9017909..24d0b4a 100644 --- a/mwparserfromhell/parser/ctokenizer/tokenizer.c +++ b/mwparserfromhell/parser/ctokenizer/tokenizer.c @@ -207,7 +207,7 @@ static int load_entities(void) if (!deflist) return -1; Py_DECREF(defmap); - numdefs = (unsigned) PyList_GET_SIZE(defmap); + numdefs = (unsigned) PyList_GET_SIZE(deflist); entitydefs = calloc(numdefs + 1, sizeof(char*)); if (!entitydefs) return -1; diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index 036dc9b..3110179 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -44,7 +44,7 @@ class Token(dict): args.append(key + "=" + repr(value[:97] + "...")) else: args.append(key + "=" + repr(value)) - return "{0}({1})".format(type(self).__name__, ", ".join(args)) + return "{}({})".format(type(self).__name__, ", ".join(args)) def __eq__(self, other): return isinstance(other, type(self)) and dict.__eq__(self, other) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 88898a1..3664a09 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -28,7 +28,7 @@ interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. from __future__ import unicode_literals from sys import getdefaultencoding -from .compat import bytes, py26, py3k, str +from .compat import bytes, py3k, str __all__ = ["StringMixIn"] @@ -109,21 +109,12 @@ class StringMixIn(object): def __getattr__(self, attr): if not hasattr(str, attr): - raise AttributeError("{0!r} object has no attribute {1!r}".format( + raise AttributeError("{!r} object has no attribute {!r}".format( type(self).__name__, attr)) return getattr(self.__unicode__(), attr) if py3k: maketrans = str.maketrans # Static method can't rely on __getattr__ - if py26: - @inheritdoc - def encode(self, encoding=None, errors=None): - if encoding is None: - encoding = getdefaultencoding() - if errors is not None: - return self.__unicode__().encode(encoding, errors) - return self.__unicode__().encode(encoding) - del inheritdoc diff --git a/scripts/memtest.py b/scripts/memtest.py index 823560d..64e8c6b 100644 --- a/scripts/memtest.py +++ b/scripts/memtest.py @@ -80,7 +80,7 @@ class MemoryTest(object): raw = raw.encode("raw_unicode_escape") data["input"] = raw.decode("unicode_escape") number = str(counter).zfill(digits) - fname = "test_{0}{1}_{2}".format(name, number, data["name"]) + fname = "test_{}{}_{}".format(name, number, data["name"]) self._tests.append((fname, data["input"])) counter += 1 @@ -117,7 +117,7 @@ class MemoryTest(object): tmpl = "{0}LEAKING{1}: {2:n} bytes, {3:.2%} inc ({4:n} bytes/loop)" sys.stdout.write(tmpl.format(Color.YELLOW, Color.RESET, d, p, bpt)) else: - sys.stdout.write("{0}OK{1}".format(Color.GREEN, Color.RESET)) + sys.stdout.write("{}OK{}".format(Color.GREEN, Color.RESET)) def run(self): """Run the memory test suite.""" diff --git a/scripts/release.sh b/scripts/release.sh index 0d31e15..5dbefbe 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -9,7 +9,7 @@ fi VERSION=$1 SCRIPT_DIR=$(dirname "$0") -RELEASE_DATE=$(date +"%B %d, %Y") +RELEASE_DATE=$(date +"%B %-d, %Y") check_git() { if [[ -n "$(git status --porcelain --untracked-files=no)" ]]; then @@ -76,9 +76,8 @@ do_git_stuff() { } upload_to_pypi() { - echo -n "PyPI: uploading source tarball and docs..." + echo -n "PyPI: uploading source tarball..." python setup.py -q register sdist upload -s - python setup.py -q upload_docs echo " done." } @@ -88,7 +87,7 @@ post_release() { echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION" echo "*** Verify: https://pypi.python.org/pypi/mwparserfromhell" echo "*** Verify: https://ci.appveyor.com/project/earwig/mwparserfromhell" - echo "*** Verify: https://mwparserfromhell.readthedocs.org" + echo "*** Verify: https://mwparserfromhell.readthedocs.io" echo "*** Press enter to sanity-check the release." read } diff --git a/setup.py b/setup.py index 0b33d42..8f84eb5 100644 --- a/setup.py +++ b/setup.py @@ -27,15 +27,15 @@ from glob import glob from os import environ import sys -if ((sys.version_info[0] == 2 and sys.version_info[1] < 6) or - (sys.version_info[1] == 3 and sys.version_info[1] < 2)): - raise RuntimeError("mwparserfromhell needs Python 2.6+ or 3.2+") +if ((sys.version_info[0] == 2 and sys.version_info[1] < 7) or + (sys.version_info[1] == 3 and sys.version_info[1] < 4)): + raise RuntimeError("mwparserfromhell needs Python 2.7 or 3.4+") from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext from mwparserfromhell import __version__ -from mwparserfromhell.compat import py26, py3k +from mwparserfromhell.compat import py3k with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp: long_docs = fp.read() @@ -76,21 +76,20 @@ if fallback: tokenizer = Extension("mwparserfromhell.parser._tokenizer", sources=sorted(glob("mwparserfromhell/parser/ctokenizer/*.c")), - depends=glob("mwparserfromhell/parser/ctokenizer/*.h")) + depends=sorted(glob("mwparserfromhell/parser/ctokenizer/*.h"))) setup( name = "mwparserfromhell", packages = find_packages(exclude=("tests",)), ext_modules = [tokenizer] if use_extension else [], - tests_require = ["unittest2"] if py26 else [], - test_suite = "tests.discover", + test_suite = "tests", version = __version__, author = "Ben Kurtovic", author_email = "ben.kurtovic@gmail.com", url = "https://github.com/earwig/mwparserfromhell", description = "MWParserFromHell is a parser for MediaWiki wikicode.", long_description = long_docs, - download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{0}".format(__version__), + download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{}".format(__version__), keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", license = "MIT License", classifiers = [ @@ -99,11 +98,9 @@ setup( "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.2", - "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index d025625..c314df8 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -109,7 +109,7 @@ class TokenizerTestCase(object): print(error.format(filename)) continue if data["input"] is None or data["output"] is None: - error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" + error = "Test '{}' in '{}' was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue @@ -118,7 +118,7 @@ class TokenizerTestCase(object): if restrict and data["name"] != restrict: continue - fname = "test_{0}{1}_{2}".format(name, number, data["name"]) + fname = "test_{}{}_{}".format(name, number, data["name"]) meth = cls._build_test_method(fname, data) setattr(cls, fname, meth) @@ -126,7 +126,7 @@ class TokenizerTestCase(object): def build(cls): """Load and install all tests from the 'tokenizer' directory.""" def load_file(filename, restrict=None): - with codecs.open(filename, "rU", encoding="utf8") as fp: + with codecs.open(filename, "r", encoding="utf8") as fp: text = fp.read() name = path.split(filename)[1][:-len(extension)] cls._load_tests(filename, name, text, restrict) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index fe626ce..3c9aa0e 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - from unittest2 import TestCase -except ImportError: - from unittest import TestCase +from unittest import TestCase from mwparserfromhell.compat import range from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, diff --git a/tests/discover.py b/tests/discover.py deleted file mode 100644 index 6bb971b..0000000 --- a/tests/discover.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Discover tests using ``unittest2` for Python 2.6. - -It appears the default distutils test suite doesn't play nice with -``setUpClass`` thereby making some tests fail. Using ``unittest2`` to load -tests seems to work around that issue. - -http://stackoverflow.com/a/17004409/753501 -""" - -import os.path - -from mwparserfromhell.compat import py26 - -if py26: - import unittest2 as unittest -else: - import unittest - -def additional_tests(): - project_root = os.path.split(os.path.dirname(__file__))[0] - return unittest.defaultTestLoader.discover(project_root) diff --git a/tests/test_argument.py b/tests/test_argument.py index 6209b2f..16b4d0c 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text diff --git a/tests/test_attribute.py b/tests/test_attribute.py index 7fe5772..a36f59f 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Template diff --git a/tests/test_builder.py b/tests/test_builder.py index eed5861..67e0043 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import py3k from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, diff --git a/tests/test_comment.py b/tests/test_comment.py index 27129c9..cf2f14d 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Comment diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 27ff237..3552a02 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest try: from mwparserfromhell.parser._tokenizer import CTokenizer diff --git a/tests/test_docs.py b/tests/test_docs.py index 398be4c..bc4da1d 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -23,11 +23,7 @@ from __future__ import print_function, unicode_literals import json import os - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest import mwparserfromhell from mwparserfromhell.compat import py3k, str diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 8cb3158..c70905a 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import ExternalLink, Text diff --git a/tests/test_heading.py b/tests/test_heading.py index 5e6776a..e5ec470 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 4db1c13..fc09fde 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 44c30af..be09448 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text diff --git a/tests/test_parser.py b/tests/test_parser.py index d586ecd..5b12a0e 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell import parser from mwparserfromhell.compat import range diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index f7f26b8..85a55b9 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.parser.tokenizer import Tokenizer diff --git a/tests/test_roundtripping.py b/tests/test_roundtripping.py index a217e21..50f9c1f 100644 --- a/tests/test_roundtripping.py +++ b/tests/test_roundtripping.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from ._test_tokenizer import TokenizerTestCase diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 3de7db7..3c9f711 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import py3k, range from mwparserfromhell.smart_list import SmartList, _ListProxy @@ -139,36 +135,36 @@ class TestSmartList(unittest.TestCase): self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) - self.assertTrue(list1 < list3) - self.assertTrue(list1 <= list3) - self.assertFalse(list1 == list3) - self.assertTrue(list1 != list3) - self.assertFalse(list1 > list3) - self.assertFalse(list1 >= list3) + self.assertLess(list1, list3) + self.assertLessEqual(list1, list3) + self.assertNotEqual(list1, list3) + self.assertNotEqual(list1, list3) + self.assertLessEqual(list1, list3) + self.assertLess(list1, list3) other1 = [0, 2, 3, 4] - self.assertTrue(list1 < other1) - self.assertTrue(list1 <= other1) - self.assertFalse(list1 == other1) - self.assertTrue(list1 != other1) - self.assertFalse(list1 > other1) - self.assertFalse(list1 >= other1) + self.assertLess(list1, other1) + self.assertLessEqual(list1, other1) + self.assertNotEqual(list1, other1) + self.assertNotEqual(list1, other1) + self.assertLessEqual(list1, other1) + self.assertLess(list1, other1) other2 = [0, 0, 1, 2] - self.assertFalse(list1 < other2) - self.assertFalse(list1 <= other2) - self.assertFalse(list1 == other2) - self.assertTrue(list1 != other2) - self.assertTrue(list1 > other2) - self.assertTrue(list1 >= other2) + self.assertGreaterEqual(list1, other2) + self.assertGreater(list1, other2) + self.assertNotEqual(list1, other2) + self.assertNotEqual(list1, other2) + self.assertGreater(list1, other2) + self.assertGreaterEqual(list1, other2) other3 = [0, 1, 2, 3, "one", "two"] - self.assertFalse(list1 < other3) - self.assertTrue(list1 <= other3) - self.assertTrue(list1 == other3) - self.assertFalse(list1 != other3) - self.assertFalse(list1 > other3) - self.assertTrue(list1 >= other3) + self.assertGreaterEqual(list1, other3) + self.assertLessEqual(list1, other3) + self.assertEqual(list1, other3) + self.assertEqual(list1, other3) + self.assertLessEqual(list1, other3) + self.assertGreaterEqual(list1, other3) self.assertTrue(bool(list1)) self.assertFalse(bool(list2)) @@ -198,10 +194,10 @@ class TestSmartList(unittest.TestCase): self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) self.assertEqual([], list(reversed(list2))) - self.assertTrue("one" in list1) - self.assertTrue(3 in list1) - self.assertFalse(10 in list1) - self.assertFalse(0 in list2) + self.assertIn("one", list1) + self.assertIn(3, list1) + self.assertNotIn(10, list1) + self.assertNotIn(0, list2) self.assertEqual([], list2 * 5) self.assertEqual([], 5 * list2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 08d5b9e..11ee6b7 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -23,13 +23,9 @@ from __future__ import unicode_literals from sys import getdefaultencoding from types import GeneratorType +import unittest -try: - import unittest2 as unittest -except ImportError: - import unittest - -from mwparserfromhell.compat import bytes, py3k, py32, range, str +from mwparserfromhell.compat import bytes, py3k, range, str from mwparserfromhell.string_mixin import StringMixIn class _FakeString(StringMixIn): @@ -54,9 +50,7 @@ class TestStringMixIn(unittest.TestCase): "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", "title", "translate", "upper", "zfill"] if py3k: - if not py32: - methods.append("casefold") - methods.extend(["format_map", "isidentifier", "isprintable", + methods.extend(["casefold", "format_map", "isidentifier", "isprintable", "maketrans"]) else: methods.append("decode") @@ -90,33 +84,33 @@ class TestStringMixIn(unittest.TestCase): str4 = "this is a fake string" str5 = "fake string, this is" - self.assertFalse(str1 > str2) - self.assertTrue(str1 >= str2) - self.assertTrue(str1 == str2) - self.assertFalse(str1 != str2) - self.assertFalse(str1 < str2) - self.assertTrue(str1 <= str2) - - self.assertTrue(str1 > str3) - self.assertTrue(str1 >= str3) - self.assertFalse(str1 == str3) - self.assertTrue(str1 != str3) - self.assertFalse(str1 < str3) - self.assertFalse(str1 <= str3) - - self.assertFalse(str1 > str4) - self.assertTrue(str1 >= str4) - self.assertTrue(str1 == str4) - self.assertFalse(str1 != str4) - self.assertFalse(str1 < str4) - self.assertTrue(str1 <= str4) - - self.assertFalse(str5 > str1) - self.assertFalse(str5 >= str1) - self.assertFalse(str5 == str1) - self.assertTrue(str5 != str1) - self.assertTrue(str5 < str1) - self.assertTrue(str5 <= str1) + self.assertLessEqual(str1, str2) + self.assertGreaterEqual(str1, str2) + self.assertEqual(str1, str2) + self.assertEqual(str1, str2) + self.assertGreaterEqual(str1, str2) + self.assertLessEqual(str1, str2) + + self.assertGreater(str1, str3) + self.assertGreaterEqual(str1, str3) + self.assertNotEqual(str1, str3) + self.assertNotEqual(str1, str3) + self.assertGreaterEqual(str1, str3) + self.assertGreater(str1, str3) + + self.assertLessEqual(str1, str4) + self.assertGreaterEqual(str1, str4) + self.assertEqual(str1, str4) + self.assertEqual(str1, str4) + self.assertGreaterEqual(str1, str4) + self.assertLessEqual(str1, str4) + + self.assertLessEqual(str5, str1) + self.assertLess(str5, str1) + self.assertNotEqual(str5, str1) + self.assertNotEqual(str5, str1) + self.assertLess(str5, str1) + self.assertLessEqual(str5, str1) def test_other_magics(self): """test other magically implemented features, like len() and iter()""" @@ -161,13 +155,13 @@ class TestStringMixIn(unittest.TestCase): self.assertRaises(IndexError, lambda: str1[11]) self.assertRaises(IndexError, lambda: str2[0]) - self.assertTrue("k" in str1) - self.assertTrue("fake" in str1) - self.assertTrue("str" in str1) - self.assertTrue("" in str1) - self.assertTrue("" in str2) - self.assertFalse("real" in str1) - self.assertFalse("s" in str2) + self.assertIn("k", str1) + self.assertIn("fake", str1) + self.assertIn("str", str1) + self.assertIn("", str1) + self.assertIn("", str2) + self.assertNotIn("real", str1) + self.assertNotIn("s", str2) def test_other_methods(self): """test the remaining non-magic methods of StringMixIn""" @@ -329,7 +323,7 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("", str15.lower()) self.assertEqual("foobar", str16.lower()) self.assertEqual("ß", str22.lower()) - if py3k and not py32: + if py3k: self.assertEqual("", str15.casefold()) self.assertEqual("foobar", str16.casefold()) self.assertEqual("ss", str22.casefold()) @@ -378,7 +372,7 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] self.assertEqual(actual, str25.rsplit(" ", 3)) - if py3k and not py32: + if py3k: actual = [" this is a", "sentence", "with", "whitespace"] self.assertEqual(actual, str25.rsplit(maxsplit=3)) @@ -396,7 +390,7 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] self.assertEqual(actual, str25.split(" ", 3)) - if py3k and not py32: + if py3k: actual = ["this", "is", "a", "sentence with whitespace "] self.assertEqual(actual, str25.split(maxsplit=3)) diff --git a/tests/test_tag.py b/tests/test_tag.py index 2e6d8a3..c8c9808 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Tag, Template, Text diff --git a/tests/test_template.py b/tests/test_template.py index 5b939f0..e03a564 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -22,11 +22,7 @@ from __future__ import unicode_literals from difflib import unified_diff - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text diff --git a/tests/test_text.py b/tests/test_text.py index aaf8db2..4464418 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text diff --git a/tests/test_tokens.py b/tests/test_tokens.py index b33c2f1..e766002 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import py3k from mwparserfromhell.parser import tokens diff --git a/tests/test_utils.py b/tests/test_utils.py index 342cfd7..b79b544 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.nodes import Template, Text from mwparserfromhell.utils import parse_anything diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index c77fdd2..12a1761 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -24,11 +24,7 @@ from __future__ import unicode_literals from functools import partial import re from types import GeneratorType - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import py3k, str from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 80116ca..487b7af 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -21,11 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals - -try: - import unittest2 as unittest -except ImportError: - import unittest +import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink