From 7e5297fbe6fde6b9661fd6703cd8b6e711292a58 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Sun, 5 Jan 2020 21:56:06 -0800 Subject: [PATCH 1/2] Drop Python 2 support Fixes #221. --- .travis.yml | 1 - README.rst | 6 +- appveyor.yml | 8 -- docs/index.rst | 2 +- docs/usage.rst | 4 +- mwparserfromhell/__init__.py | 3 +- mwparserfromhell/compat.py | 27 ------ mwparserfromhell/definitions.py | 2 - mwparserfromhell/nodes/__init__.py | 3 - mwparserfromhell/nodes/argument.py | 5 +- mwparserfromhell/nodes/comment.py | 5 +- mwparserfromhell/nodes/external_link.py | 5 +- mwparserfromhell/nodes/extras/__init__.py | 1 - mwparserfromhell/nodes/extras/attribute.py | 5 +- mwparserfromhell/nodes/extras/parameter.py | 5 +- mwparserfromhell/nodes/heading.py | 5 +- mwparserfromhell/nodes/html_entity.py | 39 ++------- mwparserfromhell/nodes/tag.py | 5 +- mwparserfromhell/nodes/template.py | 5 +- mwparserfromhell/nodes/text.py | 5 +- mwparserfromhell/nodes/wikilink.py | 5 +- mwparserfromhell/parser/__init__.py | 5 +- mwparserfromhell/parser/builder.py | 5 +- mwparserfromhell/parser/contexts.py | 1 - mwparserfromhell/parser/ctokenizer/common.h | 26 +----- mwparserfromhell/parser/ctokenizer/tag_data.h | 2 +- mwparserfromhell/parser/ctokenizer/textbuffer.c | 55 ++---------- mwparserfromhell/parser/ctokenizer/textbuffer.h | 4 +- mwparserfromhell/parser/ctokenizer/tok_parse.c | 56 ++++++------ mwparserfromhell/parser/ctokenizer/tok_parse.h | 2 +- mwparserfromhell/parser/ctokenizer/tok_support.c | 12 +-- mwparserfromhell/parser/ctokenizer/tok_support.h | 6 +- mwparserfromhell/parser/ctokenizer/tokenizer.c | 34 ++----- mwparserfromhell/parser/ctokenizer/tokenizer.h | 18 ---- mwparserfromhell/parser/tokenizer.py | 10 +-- mwparserfromhell/parser/tokens.py | 6 +- mwparserfromhell/smart_list/ListProxy.py | 56 +++--------- mwparserfromhell/smart_list/SmartList.py | 66 ++++---------- mwparserfromhell/smart_list/__init__.py | 1 - mwparserfromhell/smart_list/utils.py | 6 +- mwparserfromhell/string_mixin.py | 36 +++----- mwparserfromhell/utils.py | 3 - mwparserfromhell/wikicode.py | 11 +-- scripts/memtest.py | 3 - setup.py | 12 +-- tests/__init__.py | 1 - tests/_test_tokenizer.py | 7 +- tests/_test_tree_equality.py | 3 - tests/compat.py | 18 ---- tests/test_argument.py | 3 - tests/test_attribute.py | 3 - tests/test_builder.py | 6 +- tests/test_comment.py | 3 - tests/test_ctokenizer.py | 2 - tests/test_docs.py | 40 +++------ tests/test_external_link.py | 3 - tests/test_heading.py | 3 - tests/test_html_entity.py | 3 - tests/test_parameter.py | 3 - tests/test_parser.py | 3 - tests/test_pytokenizer.py | 2 - tests/test_roundtripping.py | 2 - tests/test_smart_list.py | 21 +---- tests/test_string_mixin.py | 107 +++++++++-------------- tests/test_tag.py | 3 - tests/test_template.py | 3 - tests/test_text.py | 3 - tests/test_tokens.py | 14 +-- tests/test_utils.py | 2 - tests/test_wikicode.py | 3 - tests/test_wikilink.py | 3 - 71 files changed, 189 insertions(+), 657 deletions(-) delete mode 100644 mwparserfromhell/compat.py delete mode 100644 tests/compat.py diff --git a/.travis.yml b/.travis.yml index 0ecf3fe..bee8152 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ dist: xenial language: python python: - - 2.7 - 3.4 - 3.5 - 3.6 diff --git a/README.rst b/README.rst index a94f4e2..98af7a4 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ mwparserfromhell **mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for MediaWiki_ -wikicode. It supports Python 2 and Python 3. +wikicode. It supports Python 3.4+. Developed by Earwig_ with contributions from `Ξ£`_, Legoktm_, and others. Full documentation is available on ReadTheDocs_. Development occurs on GitHub_. @@ -41,7 +41,7 @@ Normal usage is rather straightforward (where ``text`` is page text): >>> wikicode = mwparserfromhell.parse(text) ``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an -ordinary ``str`` object (or ``unicode`` in Python 2) with some extra methods. +ordinary ``str`` object with some extra methods. For example: >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" @@ -111,8 +111,6 @@ saving the page!) by calling ``str()`` on it: >>> text == code True -Likewise, use ``unicode(code)`` in Python 2. - Limitations ----------- diff --git a/appveyor.yml b/appveyor.yml index 20f9e35..2a4de47 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,14 +22,6 @@ environment: secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+ matrix: - - PYTHON: "C:\\Python27" - PYTHON_VERSION: "2.7" - PYTHON_ARCH: "32" - - - PYTHON: "C:\\Python27-x64" - PYTHON_VERSION: "2.7" - PYTHON_ARCH: "64" - - PYTHON: "C:\\Python34" PYTHON_VERSION: "3.4" PYTHON_ARCH: "32" diff --git a/docs/index.rst b/docs/index.rst index 8a1621f..1ca69f6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,7 @@ MWParserFromHell v\ |version| Documentation :mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for MediaWiki_ -wikicode. It supports Python 2 and Python 3. +wikicode. It supports Python 3.4+. Developed by Earwig_ with contributions from `Ξ£`_, Legoktm_, and others. Development occurs on GitHub_. diff --git a/docs/usage.rst b/docs/usage.rst index ee667fd..2cdc690 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -7,8 +7,7 @@ Normal usage is rather straightforward (where ``text`` is page text):: >>> wikicode = mwparserfromhell.parse(text) ``wikicode`` is a :class:`mwparserfromhell.Wikicode <.Wikicode>` object, which -acts like an ordinary ``str`` object (or ``unicode`` in Python 2) with some -extra methods. For example:: +acts like an ordinary ``str`` object with some extra methods. For example:: >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" >>> wikicode = mwparserfromhell.parse(text) @@ -78,7 +77,6 @@ saving the page!) by calling :func:`str` on it:: >>> text == code True -(Likewise, use :func:`unicode(code) ` in Python 2.) For more tips, check out :class:`Wikicode's full method list <.Wikicode>` and the :mod:`list of Nodes <.nodes>`. diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index f867e26..6056b83 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -32,7 +31,7 @@ __license__ = "MIT License" __version__ = "0.6.dev0" __email__ = "ben.kurtovic@gmail.com" -from . import (compat, definitions, nodes, parser, smart_list, string_mixin, +from . import (definitions, nodes, parser, smart_list, string_mixin, utils, wikicode) parse = utils.parse_anything diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py deleted file mode 100644 index 85f9d48..0000000 --- a/mwparserfromhell/compat.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Implements support for both Python 2 and Python 3 by defining common types in -terms of their Python 2/3 variants. For example, :class:`str` is set to -:class:`unicode` on Python 2 but :class:`str` on Python 3; likewise, -:class:`bytes` is :class:`str` on 2 but :class:`bytes` on 3. These types are -meant to be imported directly from within the parser's modules. -""" - -import sys - -py3k = (sys.version_info[0] == 3) - -if py3k: - bytes = bytes - str = str - range = range - import html.entities as htmlentities - -else: - bytes = str - str = unicode - range = xrange - import htmlentitydefs as htmlentities - -del sys diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py index 4399970..6191dc6 100644 --- a/mwparserfromhell/definitions.py +++ b/mwparserfromhell/definitions.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -28,7 +27,6 @@ When updating this file, please also update the the C tokenizer version: - mwparserfromhell/parser/ctokenizer/definitions.h """ -from __future__ import unicode_literals __all__ = ["get_html_tag", "is_parsable", "is_visible", "is_single", "is_single_only", "is_scheme"] diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 1e38254..6aa6ea4 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -29,9 +28,7 @@ the name of a :class:`.Template` is a :class:`.Wikicode` object that can contain text or more templates. """ -from __future__ import unicode_literals -from ..compat import str from ..string_mixin import StringMixIn __all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index 2da1467..4d9d613 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str from ..utils import parse_anything __all__ = ["Argument"] @@ -32,7 +29,7 @@ class Argument(Node): """Represents a template argument substitution, like ``{{{foo}}}``.""" def __init__(self, name, default=None): - super(Argument, self).__init__() + super().__init__() self.name = name self.default = default diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index 40224ba..302699e 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str __all__ = ["Comment"] @@ -31,7 +28,7 @@ class Comment(Node): """Represents a hidden HTML comment, like ````.""" def __init__(self, contents): - super(Comment, self).__init__() + super().__init__() self.contents = contents def __unicode__(self): diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index 22b2ef7..4dc3594 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str from ..utils import parse_anything __all__ = ["ExternalLink"] @@ -32,7 +29,7 @@ class ExternalLink(Node): """Represents an external link, like ``[http://example.com/ Example]``.""" def __init__(self, url, title=None, brackets=True): - super(ExternalLink, self).__init__() + super().__init__() self.url = url self.title = title self.brackets = brackets diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py index 9347c61..43fe862 100644 --- a/mwparserfromhell/nodes/extras/__init__.py +++ b/mwparserfromhell/nodes/extras/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 4312199..38d2423 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,9 +19,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals -from ...compat import str from ...string_mixin import StringMixIn from ...utils import parse_anything @@ -38,7 +35,7 @@ class Attribute(StringMixIn): def __init__(self, name, value=None, quotes='"', pad_first=" ", pad_before_eq="", pad_after_eq=""): - super(Attribute, self).__init__() + super().__init__() self.name = name self._quotes = None self.value = value diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index fb0aac0..4478084 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import re -from ...compat import str from ...string_mixin import StringMixIn from ...utils import parse_anything @@ -39,7 +36,7 @@ class Parameter(StringMixIn): """ def __init__(self, name, value, showkey=True): - super(Parameter, self).__init__() + super().__init__() self.name = name self.value = value self.showkey = showkey diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 426e742..1fe8790 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str from ..utils import parse_anything __all__ = ["Heading"] @@ -32,7 +29,7 @@ class Heading(Node): """Represents a section heading in wikicode, like ``== Foo ==``.""" def __init__(self, title, level): - super(Heading, self).__init__() + super().__init__() self.title = title self.level = level diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index ea534e9..8a2eef4 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals +import html.entities as htmlentities from . import Node -from ..compat import htmlentities, py3k, str __all__ = ["HTMLEntity"] @@ -31,7 +29,7 @@ class HTMLEntity(Node): """Represents an HTML entity, like `` ``, either named or unnamed.""" def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): - super(HTMLEntity, self).__init__() + super().__init__() self._value = value if named is None: # Try to guess whether or not the entity is named try: @@ -63,32 +61,6 @@ class HTMLEntity(Node): return self.normalize() return self - if not py3k: - @staticmethod - def _unichr(value): - """Implement builtin unichr() with support for non-BMP code points. - - On wide Python builds, this functions like the normal unichr(). On - narrow builds, this returns the value's encoded surrogate pair. - """ - try: - return unichr(value) - except ValueError: - # Test whether we're on the wide or narrow Python build. Check - # the length of a non-BMP code point - # (U+1F64A, SPEAK-NO-EVIL MONKEY): - if len("\U0001F64A") == 1: # pragma: no cover - raise - # Ensure this is within the range we can encode: - if value > 0x10FFFF: - raise ValueError("unichr() arg not in range(0x110000)") - code = value - 0x10000 - if value < 0: # Invalid code point - raise - lead = 0xD800 + (code >> 10) - trail = 0xDC00 + (code % (1 << 10)) - return unichr(lead) + unichr(trail) - @property def value(self): """The string value of the HTML entity.""" @@ -173,9 +145,8 @@ class HTMLEntity(Node): def normalize(self): """Return the unicode character represented by the HTML entity.""" - chrfunc = chr if py3k else HTMLEntity._unichr if self.named: - return chrfunc(htmlentities.name2codepoint[self.value]) + return chr(htmlentities.name2codepoint[self.value]) if self.hexadecimal: - return chrfunc(int(self.value, 16)) - return chrfunc(int(self.value)) + return chr(int(self.value, 16)) + return chr(int(self.value)) diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 70a2876..9fa45c5 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,11 +19,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node from .extras import Attribute -from ..compat import str from ..definitions import is_visible from ..utils import parse_anything @@ -37,7 +34,7 @@ class Tag(Node): self_closing=False, invalid=False, implicit=False, padding="", closing_tag=None, wiki_style_separator=None, closing_wiki_markup=None): - super(Tag, self).__init__() + super().__init__() self.tag = tag self.contents = contents self._attrs = attrs if attrs else [] diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 11bccc4..34cb1e9 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,13 +19,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from collections import defaultdict import re from . import HTMLEntity, Node, Text from .extras import Parameter -from ..compat import range, str from ..utils import parse_anything __all__ = ["Template"] @@ -37,7 +34,7 @@ class Template(Node): """Represents a template in wikicode, like ``{{foo}}``.""" def __init__(self, name, params=None): - super(Template, self).__init__() + super().__init__() self.name = name if params: self._params = params diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 1c47c7b..b07eedc 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str __all__ = ["Text"] @@ -31,7 +28,7 @@ class Text(Node): """Represents ordinary, unformatted text with no special properties.""" def __init__(self, value): - super(Text, self).__init__() + super().__init__() self.value = value def __unicode__(self): diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 265a100..98ae75f 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str from ..utils import parse_anything __all__ = ["Wikilink"] @@ -32,7 +29,7 @@ class Wikilink(Node): """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" def __init__(self, title, text=None): - super(Wikilink, self).__init__() + super().__init__() self.title = title self.text = text diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index dc769a2..fb1bf20 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -36,7 +35,7 @@ class ParserError(Exception): """ def __init__(self, extra): msg = "This is a bug and should be reported. Info: {}.".format(extra) - super(ParserError, self).__init__(msg) + super().__init__(msg) from .builder import Builder @@ -50,7 +49,7 @@ except ImportError: __all__ = ["use_c", "Parser", "ParserError"] -class Parser(object): +class Parser: """Represents a parser for wikicode. Actual parsing is a two-step process: first, the text is split up into a diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index f1b9689..1ae2150 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import tokens, ParserError -from ..compat import str from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from ..nodes.extras import Attribute, Parameter @@ -45,7 +42,7 @@ def _add_handler(token_type): return decorator -class Builder(object): +class Builder: """Builds a tree of nodes out of a sequence of tokens. To use, pass a list of :class:`.Token`\\ s to the :meth:`build` method. The diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index cac5250..b6d013e 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # diff --git a/mwparserfromhell/parser/ctokenizer/common.h b/mwparserfromhell/parser/ctokenizer/common.h index f3d51f4..22a6b81 100644 --- a/mwparserfromhell/parser/ctokenizer/common.h +++ b/mwparserfromhell/parser/ctokenizer/common.h @@ -23,7 +23,7 @@ SOFTWARE. #pragma once #ifndef PY_SSIZE_T_CLEAN -#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/2/c-api/arg.html +#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/3/c-api/arg.html #endif #include @@ -34,10 +34,6 @@ SOFTWARE. /* Compatibility macros */ -#if PY_MAJOR_VERSION >= 3 -#define IS_PY3K -#endif - #ifndef uint64_t #define uint64_t unsigned PY_LONG_LONG #endif @@ -48,20 +44,8 @@ SOFTWARE. /* Unicode support macros */ -#if defined(IS_PY3K) && PY_MINOR_VERSION >= 3 -#define PEP_393 -#endif - -#ifdef PEP_393 -#define Unicode Py_UCS4 #define PyUnicode_FROM_SINGLE(chr) \ PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &(chr), 1) -#else -#define Unicode Py_UNICODE -#define PyUnicode_FROM_SINGLE(chr) \ - PyUnicode_FromUnicode(&(chr), 1) -#define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE -#endif /* Error handling macros */ @@ -85,13 +69,9 @@ extern PyObject* definitions; typedef struct { Py_ssize_t capacity; Py_ssize_t length; -#ifdef PEP_393 PyObject* object; int kind; void* data; -#else - Py_UNICODE* data; -#endif } Textbuffer; typedef struct { @@ -111,12 +91,8 @@ typedef struct Stack Stack; typedef struct { PyObject* object; /* base PyUnicodeObject object */ Py_ssize_t length; /* length of object, in code points */ -#ifdef PEP_393 int kind; /* object's kind value */ void* data; /* object's raw unicode buffer */ -#else - Py_UNICODE* buf; /* object's internal buffer */ -#endif } TokenizerInput; typedef struct avl_tree_node avl_tree; diff --git a/mwparserfromhell/parser/ctokenizer/tag_data.h b/mwparserfromhell/parser/ctokenizer/tag_data.h index c2e9303..7e8edcb 100644 --- a/mwparserfromhell/parser/ctokenizer/tag_data.h +++ b/mwparserfromhell/parser/ctokenizer/tag_data.h @@ -32,7 +32,7 @@ typedef struct { Textbuffer* pad_first; Textbuffer* pad_before_eq; Textbuffer* pad_after_eq; - Unicode quoter; + Py_UCS4 quoter; Py_ssize_t reset; } TagData; diff --git a/mwparserfromhell/parser/ctokenizer/textbuffer.c b/mwparserfromhell/parser/ctokenizer/textbuffer.c index 3fd129f..e37b7c3 100644 --- a/mwparserfromhell/parser/ctokenizer/textbuffer.c +++ b/mwparserfromhell/parser/ctokenizer/textbuffer.c @@ -29,23 +29,16 @@ SOFTWARE. /* Internal allocation function for textbuffers. */ -static int internal_alloc(Textbuffer* self, Unicode maxchar) +static int internal_alloc(Textbuffer* self, Py_UCS4 maxchar) { self->capacity = INITIAL_CAPACITY; self->length = 0; -#ifdef PEP_393 self->object = PyUnicode_New(self->capacity, maxchar); if (!self->object) return -1; self->kind = PyUnicode_KIND(self->object); self->data = PyUnicode_DATA(self->object); -#else - (void) maxchar; // Unused - self->data = malloc(sizeof(Unicode) * self->capacity); - if (!self->data) - return -1; -#endif return 0; } @@ -55,11 +48,7 @@ static int internal_alloc(Textbuffer* self, Unicode maxchar) */ static void internal_dealloc(Textbuffer* self) { -#ifdef PEP_393 Py_DECREF(self->object); -#else - free(self->data); -#endif } /* @@ -67,7 +56,6 @@ static void internal_dealloc(Textbuffer* self) */ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) { -#ifdef PEP_393 PyObject *newobj; void *newdata; @@ -79,10 +67,6 @@ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) Py_DECREF(self->object); self->object = newobj; self->data = newdata; -#else - if (!(self->data = realloc(self->data, sizeof(Unicode) * new_cap))) - return -1; -#endif self->capacity = new_cap; return 0; @@ -94,11 +78,9 @@ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) Textbuffer* Textbuffer_new(TokenizerInput* text) { Textbuffer* self = malloc(sizeof(Textbuffer)); - Unicode maxchar = 0; + Py_UCS4 maxchar = 0; -#ifdef PEP_393 maxchar = PyUnicode_MAX_CHAR_VALUE(text->object); -#endif if (!self) goto fail_nomem; @@ -127,11 +109,9 @@ void Textbuffer_dealloc(Textbuffer* self) */ int Textbuffer_reset(Textbuffer* self) { - Unicode maxchar = 0; + Py_UCS4 maxchar = 0; -#ifdef PEP_393 maxchar = PyUnicode_MAX_CHAR_VALUE(self->object); -#endif internal_dealloc(self); if (internal_alloc(self, maxchar)) @@ -142,18 +122,14 @@ int Textbuffer_reset(Textbuffer* self) /* Write a Unicode codepoint to the given textbuffer. */ -int Textbuffer_write(Textbuffer* self, Unicode code) +int Textbuffer_write(Textbuffer* self, Py_UCS4 code) { if (self->length >= self->capacity) { if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) return -1; } -#ifdef PEP_393 PyUnicode_WRITE(self->kind, self->data, self->length++, code); -#else - self->data[self->length++] = code; -#endif return 0; } @@ -163,13 +139,9 @@ int Textbuffer_write(Textbuffer* self, Unicode code) This function does not check for bounds. */ -Unicode Textbuffer_read(Textbuffer* self, Py_ssize_t index) +Py_UCS4 Textbuffer_read(Textbuffer* self, Py_ssize_t index) { -#ifdef PEP_393 return PyUnicode_READ(self->kind, self->data, index); -#else - return self->data[index]; -#endif } /* @@ -177,11 +149,7 @@ Unicode Textbuffer_read(Textbuffer* self, Py_ssize_t index) */ PyObject* Textbuffer_render(Textbuffer* self) { -#ifdef PEP_393 return PyUnicode_FromKindAndData(self->kind, self->data, self->length); -#else - return PyUnicode_FromUnicode(self->data, self->length); -#endif } /* @@ -196,14 +164,9 @@ int Textbuffer_concat(Textbuffer* self, Textbuffer* other) return -1; } -#ifdef PEP_393 assert(self->kind == other->kind); memcpy(((Py_UCS1*) self->data) + self->kind * self->length, other->data, other->length * other->kind); -#else - memcpy(self->data + self->length, other->data, - other->length * sizeof(Unicode)); -#endif self->length = newlen; return 0; @@ -215,18 +178,12 @@ int Textbuffer_concat(Textbuffer* self, Textbuffer* other) void Textbuffer_reverse(Textbuffer* self) { Py_ssize_t i, end = self->length - 1; - Unicode tmp; + Py_UCS4 tmp; for (i = 0; i < self->length / 2; i++) { -#ifdef PEP_393 tmp = PyUnicode_READ(self->kind, self->data, i); PyUnicode_WRITE(self->kind, self->data, i, PyUnicode_READ(self->kind, self->data, end - i)); PyUnicode_WRITE(self->kind, self->data, end - i, tmp); -#else - tmp = self->data[i]; - self->data[i] = self->data[end - i]; - self->data[end - i] = tmp; -#endif } } diff --git a/mwparserfromhell/parser/ctokenizer/textbuffer.h b/mwparserfromhell/parser/ctokenizer/textbuffer.h index 35579fd..85b39bc 100644 --- a/mwparserfromhell/parser/ctokenizer/textbuffer.h +++ b/mwparserfromhell/parser/ctokenizer/textbuffer.h @@ -29,8 +29,8 @@ SOFTWARE. Textbuffer* Textbuffer_new(TokenizerInput*); void Textbuffer_dealloc(Textbuffer*); int Textbuffer_reset(Textbuffer*); -int Textbuffer_write(Textbuffer*, Unicode); -Unicode Textbuffer_read(Textbuffer*, Py_ssize_t); +int Textbuffer_write(Textbuffer*, Py_UCS4); +Py_UCS4 Textbuffer_read(Textbuffer*, Py_ssize_t); PyObject* Textbuffer_render(Textbuffer*); int Textbuffer_concat(Textbuffer*, Textbuffer*); void Textbuffer_reverse(Textbuffer*); diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index deac6c5..be7018b 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -52,7 +52,7 @@ static int Tokenizer_parse_tag(Tokenizer*); /* Determine whether the given code point is a marker. */ -static int is_marker(Unicode this) +static int is_marker(Py_UCS4 this) { int i; @@ -442,7 +442,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) static const char* valid = URISCHEME; Textbuffer* buffer; PyObject* scheme; - Unicode this; + Py_UCS4 this; int slashes, i; if (Tokenizer_check_route(self, LC_EXT_LINK_URI) < 0) @@ -463,7 +463,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) while (1) { if (!valid[i]) goto end_of_loop; - if (this == (Unicode) valid[i]) + if (this == (Py_UCS4) valid[i]) break; i++; } @@ -516,7 +516,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) static const char* valid = URISCHEME; Textbuffer *scheme_buffer = Textbuffer_new(&self->text); PyObject *scheme; - Unicode chunk; + Py_UCS4 chunk; Py_ssize_t i; int slashes, j; uint64_t new_context; @@ -536,7 +536,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) FAIL_ROUTE(0); return 0; } - } while (chunk != (Unicode) valid[j++]); + } while (chunk != (Py_UCS4) valid[j++]); Textbuffer_write(scheme_buffer, chunk); } end_of_loop: @@ -580,7 +580,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) Handle text in a free external link, including trailing punctuation. */ static int Tokenizer_handle_free_link_text( - Tokenizer* self, int* parens, Textbuffer* tail, Unicode this) + Tokenizer* self, int* parens, Textbuffer* tail, Py_UCS4 this) { #define PUSH_TAIL_BUFFER(tail, error) \ if (tail && tail->length > 0) { \ @@ -607,10 +607,10 @@ static int Tokenizer_handle_free_link_text( Return whether the current head is the end of a free link. */ static int -Tokenizer_is_free_link(Tokenizer* self, Unicode this, Unicode next) +Tokenizer_is_free_link(Tokenizer* self, Py_UCS4 this, Py_UCS4 next) { // Built from Tokenizer_parse()'s end sentinels: - Unicode after = Tokenizer_read(self, 2); + Py_UCS4 after = Tokenizer_read(self, 2); uint64_t ctx = self->topstack->context; return (!this || this == '\n' || this == '[' || this == ']' || @@ -628,7 +628,7 @@ static PyObject* Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, Textbuffer* extra) { - Unicode this, next; + Py_UCS4 this, next; int parens = 0; if (brackets ? Tokenizer_parse_bracketed_uri_scheme(self) : @@ -816,11 +816,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) if (!heading) { return -1; } -#ifdef IS_PY3K level = PyLong_FromSsize_t(heading->level); -#else - level = PyInt_FromSsize_t(heading->level); -#endif if (!level) { Py_DECREF(heading->title); free(heading); @@ -933,7 +929,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) static int Tokenizer_really_parse_entity(Tokenizer* self) { PyObject *kwargs, *charobj, *textobj; - Unicode this; + Py_UCS4 this; int numeric, hexadecimal, i, j, zeroes, test; char *valid, *text, *buffer, *def; @@ -1014,7 +1010,7 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) while (1) { if (!valid[j]) FAIL_ROUTE_AND_EXIT() - if (this == (Unicode) valid[j]) + if (this == (Py_UCS4) valid[j]) break; j++; } @@ -1111,7 +1107,7 @@ static int Tokenizer_parse_comment(Tokenizer* self) { Py_ssize_t reset = self->head + 3; PyObject *comment; - Unicode this; + Py_UCS4 this; self->head += 4; if (Tokenizer_push(self, 0)) @@ -1211,7 +1207,7 @@ static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data) Handle whitespace inside of an HTML open tag. */ static int Tokenizer_handle_tag_space( - Tokenizer* self, TagData* data, Unicode text) + Tokenizer* self, TagData* data, Py_UCS4 text) { uint64_t ctx = data->context; uint64_t end_of_value = (ctx & TAG_ATTR_VALUE && @@ -1243,9 +1239,9 @@ static int Tokenizer_handle_tag_space( /* Handle regular text inside of an HTML open tag. */ -static int Tokenizer_handle_tag_text(Tokenizer* self, Unicode text) +static int Tokenizer_handle_tag_text(Tokenizer* self, Py_UCS4 text) { - Unicode next = Tokenizer_read(self, 1); + Py_UCS4 next = Tokenizer_read(self, 1); if (!is_marker(text) || !Tokenizer_CAN_RECURSE(self)) return Tokenizer_emit_char(self, text); @@ -1262,7 +1258,7 @@ static int Tokenizer_handle_tag_text(Tokenizer* self, Unicode text) Handle all sorts of text data inside of an HTML open tag. */ static int Tokenizer_handle_tag_data( - Tokenizer* self, TagData* data, Unicode chunk) + Tokenizer* self, TagData* data, Py_UCS4 chunk) { PyObject *trash; int first_time, escaped; @@ -1444,7 +1440,7 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self) { Textbuffer* buffer; PyObject *buf_tmp, *end_tag, *start_tag; - Unicode this, next; + Py_UCS4 this, next; Py_ssize_t reset; int cmp; @@ -1600,7 +1596,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) { TagData *data = TagData_new(&self->text); PyObject *token, *text, *trash; - Unicode this, next; + Py_UCS4 this, next; int can_exit; if (!data) @@ -1686,7 +1682,7 @@ static int Tokenizer_handle_invalid_tag_start(Tokenizer* self) Py_ssize_t reset = self->head + 1, pos = 0; Textbuffer* buf; PyObject *name, *tag; - Unicode this; + Py_UCS4 this; self->head += 2; buf = Textbuffer_new(&self->text); @@ -1988,7 +1984,7 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self) static int Tokenizer_handle_list_marker(Tokenizer* self) { PyObject *kwargs, *markup; - Unicode code = Tokenizer_read(self, 0); + Py_UCS4 code = Tokenizer_read(self, 0); if (code == ';') self->topstack->context |= LC_DLTERM; @@ -2015,7 +2011,7 @@ static int Tokenizer_handle_list_marker(Tokenizer* self) */ static int Tokenizer_handle_list(Tokenizer* self) { - Unicode marker = Tokenizer_read(self, 1); + Py_UCS4 marker = Tokenizer_read(self, 1); if (Tokenizer_handle_list_marker(self)) return -1; @@ -2169,11 +2165,11 @@ Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup, /* Handle style attributes for a table until an ending token. */ -static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Unicode end_token) +static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Py_UCS4 end_token) { TagData *data = TagData_new(&self->text); PyObject *padding, *trash; - Unicode this; + Py_UCS4 this; int can_exit; if (!data) @@ -2483,7 +2479,7 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context) everything is safe, or -1 if the route must be failed. */ static int -Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Unicode data) +Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UCS4 data) { if (context & LC_FAIL_NEXT) return -1; @@ -2568,7 +2564,7 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Unicode data) static int Tokenizer_has_leading_whitespace(Tokenizer* self) { int offset = 1; - Unicode current_character; + Py_UCS4 current_character; while (1) { current_character = Tokenizer_read_backwards(self, offset); if (!current_character || current_character == '\n') @@ -2586,7 +2582,7 @@ static int Tokenizer_has_leading_whitespace(Tokenizer* self) PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) { uint64_t this_context; - Unicode this, next, next_next, last; + Py_UCS4 this, next, next_next, last; PyObject* temp; if (push) { diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.h b/mwparserfromhell/parser/ctokenizer/tok_parse.h index 9d98b00..bdae573 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.h +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.h @@ -24,7 +24,7 @@ SOFTWARE. #include "common.h" -static const Unicode MARKERS[] = { +static const Py_UCS4 MARKERS[] = { '{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', '-', '!', '\n', '\0'}; diff --git a/mwparserfromhell/parser/ctokenizer/tok_support.c b/mwparserfromhell/parser/ctokenizer/tok_support.c index 30dc2a1..bf554f6 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_support.c +++ b/mwparserfromhell/parser/ctokenizer/tok_support.c @@ -275,7 +275,7 @@ int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, /* Write a Unicode codepoint to the current textbuffer. */ -int Tokenizer_emit_char(Tokenizer* self, Unicode code) +int Tokenizer_emit_char(Tokenizer* self, Py_UCS4 code) { return Textbuffer_write(self->topstack->textbuffer, code); } @@ -389,19 +389,15 @@ int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) /* Internal function to read the codepoint at the given index from the input. */ -static Unicode read_codepoint(TokenizerInput* text, Py_ssize_t index) +static Py_UCS4 read_codepoint(TokenizerInput* text, Py_ssize_t index) { -#ifdef PEP_393 return PyUnicode_READ(text->kind, text->data, index); -#else - return text->buf[index]; -#endif } /* Read the value at a relative point in the wikicode, forwards. */ -Unicode Tokenizer_read(Tokenizer* self, Py_ssize_t delta) +Py_UCS4 Tokenizer_read(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index = self->head + delta; @@ -413,7 +409,7 @@ Unicode Tokenizer_read(Tokenizer* self, Py_ssize_t delta) /* Read the value at a relative point in the wikicode, backwards. */ -Unicode Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) +Py_UCS4 Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index; diff --git a/mwparserfromhell/parser/ctokenizer/tok_support.h b/mwparserfromhell/parser/ctokenizer/tok_support.h index f65d102..d08f5c4 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_support.h +++ b/mwparserfromhell/parser/ctokenizer/tok_support.h @@ -38,14 +38,14 @@ void Tokenizer_free_bad_route_tree(Tokenizer*); int Tokenizer_emit_token(Tokenizer*, PyObject*, int); int Tokenizer_emit_token_kwargs(Tokenizer*, PyObject*, PyObject*, int); -int Tokenizer_emit_char(Tokenizer*, Unicode); +int Tokenizer_emit_char(Tokenizer*, Py_UCS4); int Tokenizer_emit_text(Tokenizer*, const char*); int Tokenizer_emit_textbuffer(Tokenizer*, Textbuffer*); int Tokenizer_emit_all(Tokenizer*, PyObject*); int Tokenizer_emit_text_then_stack(Tokenizer*, const char*); -Unicode Tokenizer_read(Tokenizer*, Py_ssize_t); -Unicode Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); +Py_UCS4 Tokenizer_read(Tokenizer*, Py_ssize_t); +Py_UCS4 Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); /* Macros */ diff --git a/mwparserfromhell/parser/ctokenizer/tokenizer.c b/mwparserfromhell/parser/ctokenizer/tokenizer.c index 24d0b4a..a501032 100644 --- a/mwparserfromhell/parser/ctokenizer/tokenizer.c +++ b/mwparserfromhell/parser/ctokenizer/tokenizer.c @@ -85,12 +85,8 @@ static void init_tokenizer_text(TokenizerInput* text) text->object = Py_None; Py_INCREF(Py_None); text->length = 0; -#ifdef PEP_393 text->kind = PyUnicode_WCHAR_KIND; text->data = NULL; -#else - text->buf = NULL; -#endif } /* @@ -119,14 +115,10 @@ static int load_tokenizer_text(TokenizerInput* text, PyObject *input) dealloc_tokenizer_text(text); text->object = input; -#ifdef PEP_393 if (PyUnicode_READY(input) < 0) return -1; text->kind = PyUnicode_KIND(input); text->data = PyUnicode_DATA(input); -#else - text->buf = PyUnicode_AS_UNICODE(input); -#endif text->length = PyUnicode_GET_LENGTH(input); return 0; } @@ -192,11 +184,9 @@ static int load_entities(void) { PyObject *tempmod, *defmap, *deflist; unsigned numdefs, i; -#ifdef IS_PY3K PyObject *string; -#endif - tempmod = PyImport_ImportModule(ENTITYDEFS_MODULE); + tempmod = PyImport_ImportModule("html.entities"); if (!tempmod) return -1; defmap = PyObject_GetAttrString(tempmod, "entitydefs"); @@ -212,14 +202,10 @@ static int load_entities(void) if (!entitydefs) return -1; for (i = 0; i < numdefs; i++) { -#ifdef IS_PY3K string = PyUnicode_AsASCIIString(PyList_GET_ITEM(deflist, i)); if (!string) return -1; entitydefs[i] = PyBytes_AsString(string); -#else - entitydefs[i] = PyBytes_AsString(PyList_GET_ITEM(deflist, i)); -#endif if (!entitydefs[i]) return -1; } @@ -233,7 +219,7 @@ static int load_tokens(void) *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), *fromlist = PyList_New(1), - *modname = IMPORT_NAME_FUNC("tokens"); + *modname = PyUnicode_FromString("tokens"); char *name = "mwparserfromhell.parser"; if (!fromlist || !modname) @@ -256,7 +242,7 @@ static int load_defs(void) *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), *fromlist = PyList_New(1), - *modname = IMPORT_NAME_FUNC("definitions"); + *modname = PyUnicode_FromString("definitions"); char *name = "mwparserfromhell"; if (!fromlist || !modname) @@ -277,7 +263,7 @@ static int load_exceptions(void) *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), *fromlist = PyList_New(1), - *modname = IMPORT_NAME_FUNC("parser"); + *modname = PyUnicode_FromString("parser"); char *name = "mwparserfromhell"; if (!fromlist || !modname) @@ -294,24 +280,22 @@ static int load_exceptions(void) return 0; } -PyMODINIT_FUNC INIT_FUNC_NAME(void) +PyMODINIT_FUNC PyInit__tokenizer(void) { PyObject *module; TokenizerType.tp_new = PyType_GenericNew; if (PyType_Ready(&TokenizerType) < 0) - INIT_ERROR; - module = CREATE_MODULE; + return NULL; + module = PyModule_Create(&module_def); if (!module) - INIT_ERROR; + return NULL; Py_INCREF(&TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); Py_INCREF(Py_True); PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); NOARGS = PyTuple_New(0); if (!NOARGS || load_entities() || load_tokens() || load_defs()) - INIT_ERROR; -#ifdef IS_PY3K + return NULL; return module; -#endif } diff --git a/mwparserfromhell/parser/ctokenizer/tokenizer.h b/mwparserfromhell/parser/ctokenizer/tokenizer.h index 6050ce0..ac98d79 100644 --- a/mwparserfromhell/parser/ctokenizer/tokenizer.h +++ b/mwparserfromhell/parser/ctokenizer/tokenizer.h @@ -32,22 +32,6 @@ static void Tokenizer_dealloc(Tokenizer*); static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); -/* Compatibility macros */ - -#ifdef IS_PY3K - #define IMPORT_NAME_FUNC PyUnicode_FromString - #define CREATE_MODULE PyModule_Create(&module_def); - #define ENTITYDEFS_MODULE "html.entities" - #define INIT_FUNC_NAME PyInit__tokenizer - #define INIT_ERROR return NULL -#else - #define IMPORT_NAME_FUNC PyBytes_FromString - #define CREATE_MODULE Py_InitModule("_tokenizer", NULL); - #define ENTITYDEFS_MODULE "htmlentitydefs" - #define INIT_FUNC_NAME init_tokenizer - #define INIT_ERROR return -#endif - /* Structs */ static PyMethodDef Tokenizer_methods[] = { @@ -101,11 +85,9 @@ static PyTypeObject TokenizerType = { Tokenizer_new, /* tp_new */ }; -#ifdef IS_PY3K static PyModuleDef module_def = { PyModuleDef_HEAD_INIT, "_tokenizer", "Creates a list of tokens from a string of wikicode.", -1, NULL, NULL, NULL, NULL, NULL }; -#endif diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index f44360e..a95c477 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,12 +19,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals +import html.entities as htmlentities from math import log import re from . import contexts, tokens, ParserError -from ..compat import htmlentities, range from ..definitions import (get_html_tag, is_parsable, is_single, is_single_only, is_scheme) @@ -35,11 +33,11 @@ class BadRoute(Exception): """Raised internally when the current tokenization route is invalid.""" def __init__(self, context=0): - super(BadRoute, self).__init__() + super().__init__() self.context = context -class _TagOpenData(object): +class _TagOpenData: """Stores data about an HTML open tag, like ````.""" CX_NAME = 1 << 0 CX_ATTR_READY = 1 << 1 @@ -57,7 +55,7 @@ class _TagOpenData(object): self.reset = 0 -class Tokenizer(object): +class Tokenizer: """Creates a list of tokens from a string of wikicode.""" USES_C = False START = object() diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index 3110179..ec99c67 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -28,9 +27,6 @@ a syntactically valid form by the :class:`.Tokenizer`, and then converted into the :class`.Wikicode` tree by the :class:`.Builder`. """ -from __future__ import unicode_literals - -from ..compat import py3k, str __all__ = ["Token"] @@ -65,7 +61,7 @@ class Token(dict): def make(name): """Create a new Token class using ``type()`` and add it to ``__all__``.""" __all__.append(name) - return type(name if py3k else name.encode("utf8"), (Token,), {}) + return type(name, (Token,), {}) Text = make("Text") diff --git a/mwparserfromhell/smart_list/ListProxy.py b/mwparserfromhell/smart_list/ListProxy.py index 6d4b85c..35b45dc 100644 --- a/mwparserfromhell/smart_list/ListProxy.py +++ b/mwparserfromhell/smart_list/ListProxy.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan @@ -24,7 +23,6 @@ # SmartList has to be a full import in order to avoid cyclical import errors import mwparserfromhell.smart_list.SmartList from .utils import _SliceNormalizerMixIn, inheritdoc -from ..compat import py3k class _ListProxy(_SliceNormalizerMixIn, list): @@ -36,7 +34,7 @@ class _ListProxy(_SliceNormalizerMixIn, list): """ def __init__(self, parent, sliceinfo): - super(_ListProxy, self).__init__() + super().__init__() self._parent = parent self._sliceinfo = sliceinfo @@ -73,12 +71,8 @@ class _ListProxy(_SliceNormalizerMixIn, list): return self._render() >= list(other) return self._render() >= other - if py3k: - def __bool__(self): - return bool(self._render()) - else: - def __nonzero__(self): - return bool(self._render()) + def __bool__(self): + return bool(self._render()) def __len__(self): return max((self._stop - self._start) // self._step, 0) @@ -138,16 +132,6 @@ class _ListProxy(_SliceNormalizerMixIn, list): def __contains__(self, item): return item in self._render() - if not py3k: - def __getslice__(self, start, stop): - return self.__getitem__(slice(start, stop)) - - def __setslice__(self, start, stop, iterable): - self.__setitem__(slice(start, stop), iterable) - - def __delslice__(self, start, stop): - self.__delitem__(slice(start, stop)) - def __add__(self, other): return mwparserfromhell.smart_list.SmartList(list(self) + other) @@ -237,27 +221,13 @@ class _ListProxy(_SliceNormalizerMixIn, list): item.reverse() self._parent[self._start:self._stop:self._step] = item - if py3k: - @inheritdoc - def sort(self, key=None, reverse=None): - item = self._render() - kwargs = {} - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - item.sort(**kwargs) - self._parent[self._start:self._stop:self._step] = item - else: - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - item = self._render() - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - item.sort(**kwargs) - self._parent[self._start:self._stop:self._step] = item + @inheritdoc + def sort(self, key=None, reverse=None): + item = self._render() + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item diff --git a/mwparserfromhell/smart_list/SmartList.py b/mwparserfromhell/smart_list/SmartList.py index 30d2b1e..c2e83a4 100644 --- a/mwparserfromhell/smart_list/SmartList.py +++ b/mwparserfromhell/smart_list/SmartList.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # Copyright (C) 2012-2016 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # @@ -25,7 +23,6 @@ from _weakref import ref from .ListProxy import _ListProxy from .utils import _SliceNormalizerMixIn, inheritdoc -from ..compat import py3k class SmartList(_SliceNormalizerMixIn, list): @@ -54,14 +51,14 @@ class SmartList(_SliceNormalizerMixIn, list): def __init__(self, iterable=None): if iterable: - super(SmartList, self).__init__(iterable) + super().__init__(iterable) else: - super(SmartList, self).__init__() + super().__init__() self._children = {} def __getitem__(self, key): if not isinstance(key, slice): - return super(SmartList, self).__getitem__(key) + return super().__getitem__(key) key = self._normalize_slice(key, clamp=False) sliceinfo = [key.start, key.stop, key.step] child = _ListProxy(self, sliceinfo) @@ -71,44 +68,32 @@ class SmartList(_SliceNormalizerMixIn, list): def __setitem__(self, key, item): if not isinstance(key, slice): - return super(SmartList, self).__setitem__(key, item) + return super().__setitem__(key, item) item = list(item) - super(SmartList, self).__setitem__(key, item) + super().__setitem__(key, item) key = self._normalize_slice(key, clamp=True) diff = len(item) + (key.start - key.stop) // key.step if not diff: return - values = self._children.values if py3k else self._children.itervalues - for child, (start, stop, step) in values(): + for child, (start, stop, step) in self._children.values(): if start > key.stop: self._children[id(child)][1][0] += diff if stop is not None and stop >= key.stop: self._children[id(child)][1][1] += diff def __delitem__(self, key): - super(SmartList, self).__delitem__(key) + super().__delitem__(key) if isinstance(key, slice): key = self._normalize_slice(key, clamp=True) else: key = slice(key, key + 1, 1) diff = (key.stop - key.start) // key.step - values = self._children.values if py3k else self._children.itervalues - for child, (start, stop, step) in values(): + for child, (start, stop, step) in self._children.values(): if start > key.start: self._children[id(child)][1][0] -= diff if stop is not None and stop >= key.stop: self._children[id(child)][1][1] -= diff - if not py3k: - def __getslice__(self, start, stop): - return self.__getitem__(slice(start, stop)) - - def __setslice__(self, start, stop, iterable): - self.__setitem__(slice(start, stop), iterable) - - def __delslice__(self, start, stop): - self.__delitem__(slice(start, stop)) - def __add__(self, other): return SmartList(list(self) + other) @@ -159,27 +144,14 @@ class SmartList(_SliceNormalizerMixIn, list): @inheritdoc def reverse(self): self._detach_children() - super(SmartList, self).reverse() - - if py3k: - @inheritdoc - def sort(self, key=None, reverse=None): - self._detach_children() - kwargs = {} - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - super(SmartList, self).sort(**kwargs) - else: - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - self._detach_children() - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - super(SmartList, self).sort(**kwargs) + super().reverse() + + @inheritdoc + def sort(self, key=None, reverse=None): + self._detach_children() + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super().sort(**kwargs) diff --git a/mwparserfromhell/smart_list/__init__.py b/mwparserfromhell/smart_list/__init__.py index 81d4fb1..fdf7bd8 100644 --- a/mwparserfromhell/smart_list/__init__.py +++ b/mwparserfromhell/smart_list/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan diff --git a/mwparserfromhell/smart_list/utils.py b/mwparserfromhell/smart_list/utils.py index 609b095..1a36d0b 100644 --- a/mwparserfromhell/smart_list/utils.py +++ b/mwparserfromhell/smart_list/utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # Copyright (C) 2012-2016 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # @@ -21,8 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals - from sys import maxsize __all__ = [] @@ -38,7 +34,7 @@ def inheritdoc(method): return method -class _SliceNormalizerMixIn(object): +class _SliceNormalizerMixIn: """MixIn that provides a private method to normalize slices.""" def _normalize_slice(self, key, clamp=False): diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 3664a09..564706d 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -22,14 +21,11 @@ """ This module contains the :class:`.StringMixIn` type, which implements the -interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. +interface for the ``str`` type in a dynamic manner. """ -from __future__ import unicode_literals from sys import getdefaultencoding -from .compat import bytes, py3k, str - __all__ = ["StringMixIn"] def inheritdoc(method): @@ -41,24 +37,20 @@ def inheritdoc(method): method.__doc__ = getattr(str, method.__name__).__doc__ return method -class StringMixIn(object): +class StringMixIn: """Implement the interface for ``unicode``/``str`` in a dynamic manner. To use this class, inherit from it and override the :meth:`__unicode__` - method (same on py3k) to return the string representation of the object. + method to return the string representation of the object. The various string methods will operate on the value of :meth:`__unicode__` instead of the immutable ``self`` like the regular ``str`` type. """ - if py3k: - def __str__(self): - return self.__unicode__() + def __str__(self): + return self.__unicode__() - def __bytes__(self): - return bytes(self.__unicode__(), getdefaultencoding()) - else: - def __str__(self): - return bytes(self.__unicode__()) + def __bytes__(self): + return bytes(self.__unicode__(), getdefaultencoding()) def __unicode__(self): raise NotImplementedError() @@ -84,19 +76,14 @@ class StringMixIn(object): def __ge__(self, other): return self.__unicode__() >= other - if py3k: - def __bool__(self): - return bool(self.__unicode__()) - else: - def __nonzero__(self): - return bool(self.__unicode__()) + def __bool__(self): + return bool(self.__unicode__()) def __len__(self): return len(self.__unicode__()) def __iter__(self): - for char in self.__unicode__(): - yield char + yield from self.__unicode__() def __getitem__(self, key): return self.__unicode__()[key] @@ -113,8 +100,7 @@ class StringMixIn(object): type(self).__name__, attr)) return getattr(self.__unicode__(), attr) - if py3k: - maketrans = str.maketrans # Static method can't rely on __getattr__ + maketrans = str.maketrans # Static method can't rely on __getattr__ del inheritdoc diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index d30a2da..9e5e14b 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -25,9 +24,7 @@ This module contains accessory functions for other parts of the library. Parser users generally won't need stuff from here. """ -from __future__ import unicode_literals -from .compat import bytes, str from .nodes import Node from .smart_list import SmartList diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 1a966e2..f72c26b 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,12 +19,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals - import re from itertools import chain -from .compat import bytes, py3k, range, str from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) from .smart_list.ListProxy import _ListProxy @@ -49,7 +45,7 @@ class Wikicode(StringMixIn): RECURSE_OTHERS = 2 def __init__(self, nodes): - super(Wikicode, self).__init__() + super().__init__() self._nodes = nodes def __unicode__(self): @@ -64,8 +60,7 @@ class Wikicode(StringMixIn): for code in node.__children__(): for child in code.nodes: sub = Wikicode._get_children(child, contexts, restrict, code) - for result in sub: - yield result + yield from sub @staticmethod def _slice_replace(code, index, old, new): @@ -253,7 +248,7 @@ class Wikicode(StringMixIn): self.ifilter(forcetype=ftype, *a, **kw)) make_filter = lambda ftype: (lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw)) - for name, ftype in (meths.items() if py3k else meths.iteritems()): + for name, ftype in meths.items(): ifilter = make_ifilter(ftype) filter = make_filter(ftype) ifilter.__doc__ = doc.format(name, "ifilter", ftype) diff --git a/scripts/memtest.py b/scripts/memtest.py index 64e8c6b..f60e260 100644 --- a/scripts/memtest.py +++ b/scripts/memtest.py @@ -40,7 +40,6 @@ import sys import psutil -from mwparserfromhell.compat import py3k from mwparserfromhell.parser._tokenizer import CTokenizer if sys.version_info[0] == 2: @@ -88,8 +87,6 @@ class MemoryTest(object): def load_file(filename): with open(filename, "rU") as fp: text = fp.read() - if not py3k: - text = text.decode("utf8") name = path.split(filename)[1][:0-len(extension)] self._parse_file(name, text) diff --git a/setup.py b/setup.py index 97abef0..74f7567 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ #! /usr/bin/env python -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2018 Ben Kurtovic # @@ -21,23 +20,20 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import print_function from distutils.errors import DistutilsError, CCompilerError from glob import glob from os import environ import sys -if ((sys.version_info[0] == 2 and sys.version_info[1] < 7) or - (sys.version_info[1] == 3 and sys.version_info[1] < 4)): - raise RuntimeError("mwparserfromhell needs Python 2.7 or 3.4+") +if sys.version_info[1] == 3 and sys.version_info[1] < 4: + raise RuntimeError("mwparserfromhell needs 3.4+") from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext from mwparserfromhell import __version__ -from mwparserfromhell.compat import py3k -with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp: +with open("README.rst", encoding='utf-8') as fp: long_docs = fp.read() use_extension = True @@ -98,8 +94,6 @@ setup( "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", diff --git a/tests/__init__.py b/tests/__init__.py index 89907bf..e69de29 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 4d19dd4..f61cb10 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,13 +19,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import codecs from os import listdir, path import sys import warnings -from mwparserfromhell.compat import py3k, str from mwparserfromhell.parser import tokens from mwparserfromhell.parser.builder import Builder @@ -35,7 +32,7 @@ class _TestParseError(Exception): pass -class TokenizerTestCase(object): +class TokenizerTestCase: """A base test case for tokenizers, whose tests are loaded dynamically. Subclassed along with unittest.TestCase to form TestPyTokenizer and @@ -60,8 +57,6 @@ class TokenizerTestCase(object): actual = self.tokenizer().tokenize(data["input"]) self.assertEqual(expected, actual) - if not py3k: - inner.__name__ = funcname.encode("utf8") inner.__doc__ = data["label"] return inner diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index aba54d1..cdfbd3a 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from unittest import TestCase -from mwparserfromhell.compat import range from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from mwparserfromhell.nodes.extras import Attribute, Parameter diff --git a/tests/compat.py b/tests/compat.py deleted file mode 100644 index d5b3fba..0000000 --- a/tests/compat.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Serves the same purpose as mwparserfromhell.compat, but only for objects -required by unit tests. This avoids unnecessary imports (like urllib) within -the main library. -""" - -from mwparserfromhell.compat import py3k - -if py3k: - from io import StringIO - from urllib.parse import urlencode - from urllib.request import urlopen - -else: - from StringIO import StringIO - from urllib import urlencode, urlopen diff --git a/tests/test_argument.py b/tests/test_argument.py index 16b4d0c..eaf8abe 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext diff --git a/tests/test_attribute.py b/tests/test_attribute.py index e9f2528..b0d0e85 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Template from mwparserfromhell.nodes.extras import Attribute diff --git a/tests/test_builder.py b/tests/test_builder.py index 7343077..e5f43aa 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import py3k from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from mwparserfromhell.nodes.extras import Attribute, Parameter @@ -428,9 +425,8 @@ class TestBuilder(TreeEqualityTestCase): [tokens.TagOpenOpen()] ] - func = self.assertRaisesRegex if py3k else self.assertRaisesRegexp msg = r"_handle_token\(\) got unexpected TemplateClose" - func(ParserError, msg, self.builder.build, [tokens.TemplateClose()]) + self.assertRaisesRegex(ParserError, msg, self.builder.build, [tokens.TemplateClose()]) for test in missing_closes: self.assertRaises(ParserError, self.builder.build, test) diff --git a/tests/test_comment.py b/tests/test_comment.py index cf2f14d..1024e60 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Comment from ._test_tree_equality import TreeEqualityTestCase diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 3552a02..f9b8d2f 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,7 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest try: diff --git a/tests/test_docs.py b/tests/test_docs.py index 8559493..2e78106 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,15 +19,14 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import print_function, unicode_literals import json +from io import StringIO import os import unittest +from urllib.parse import urlencode +from urllib.request import urlopen import mwparserfromhell -from mwparserfromhell.compat import py3k, str - -from .compat import StringIO, urlencode, urlopen class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" @@ -47,16 +45,10 @@ class TestDocs(unittest.TestCase): self.assertPrint(wikicode, "I has a template! {{foo|bar|baz|eggs=spam}} See it?") templates = wikicode.filter_templates() - if py3k: - self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") - else: - self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']") + self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") template = templates[0] self.assertPrint(template.name, "foo") - if py3k: - self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") - else: - self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']") + self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") self.assertPrint(template.get(1).value, "bar") self.assertPrint(template.get("eggs").value, "spam") @@ -64,21 +56,14 @@ class TestDocs(unittest.TestCase): """test a block of example code in the README""" text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" temps = mwparserfromhell.parse(text).filter_templates() - if py3k: - res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" - else: - res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']" + res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" self.assertPrint(temps, res) def test_readme_3(self): """test a block of example code in the README""" code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") - if py3k: - self.assertPrint(code.filter_templates(recursive=False), - "['{{foo|this {{includes a|template}}}}']") - else: - self.assertPrint(code.filter_templates(recursive=False), - "[u'{{foo|this {{includes a|template}}}}']") + self.assertPrint(code.filter_templates(recursive=False), + "['{{foo|this {{includes a|template}}}}']") foo = code.filter_templates(recursive=False)[0] self.assertPrint(foo.get(1).value, "this {{includes a|template}}") self.assertPrint(foo.get(1).value.filter_templates()[0], @@ -98,10 +83,7 @@ class TestDocs(unittest.TestCase): code.replace("{{uncategorized}}", "{{bar-stub}}") res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" self.assertPrint(code, res) - if py3k: - res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" - else: - res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']" + res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" self.assertPrint(code.filter_templates(), res) text = str(code) res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" @@ -126,14 +108,14 @@ class TestDocs(unittest.TestCase): } try: raw = urlopen(url1, urlencode(data).encode("utf8")).read() - except IOError: + except OSError: self.skipTest("cannot continue because of unsuccessful web call") res = json.loads(raw.decode("utf8")) revision = res["query"]["pages"][0]["revisions"][0] text = revision["slots"]["main"]["content"] try: expected = urlopen(url2.format(title)).read().decode("utf8") - except IOError: + except OSError: self.skipTest("cannot continue because of unsuccessful web call") actual = mwparserfromhell.parse(text) self.assertEqual(expected, actual) diff --git a/tests/test_external_link.py b/tests/test_external_link.py index c70905a..48a7b82 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import ExternalLink, Text from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext diff --git a/tests/test_heading.py b/tests/test_heading.py index e5ec470..46c6258 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index fc09fde..273ee21 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity from ._test_tree_equality import TreeEqualityTestCase, wrap diff --git a/tests/test_parameter.py b/tests/test_parameter.py index be09448..d53c7af 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text from mwparserfromhell.nodes.extras import Parameter diff --git a/tests/test_parser.py b/tests/test_parser.py index 5b12a0e..22a76f6 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,11 +19,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest from mwparserfromhell import parser -from mwparserfromhell.compat import range from mwparserfromhell.nodes import Tag, Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index a4c9bc1..9fd0c3e 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,7 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest from mwparserfromhell.parser import contexts diff --git a/tests/test_roundtripping.py b/tests/test_roundtripping.py index 50f9c1f..9ecd5bd 100644 --- a/tests/test_roundtripping.py +++ b/tests/test_roundtripping.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,7 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest from ._test_tokenizer import TokenizerTestCase diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 8deddd5..16d99e7 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,11 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals - import unittest -from mwparserfromhell.compat import py3k, range from mwparserfromhell.smart_list import SmartList from mwparserfromhell.smart_list.ListProxy import _ListProxy @@ -129,14 +125,9 @@ class TestSmartList(unittest.TestCase): list3 = builder([0, 2, 3, 4]) list4 = builder([0, 1, 2]) - if py3k: - self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) - self.assertEqual(b"\x00\x01\x02", bytes(list4)) - self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) - else: - self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) - self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) - self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) + self.assertEqual(b"\x00\x01\x02", bytes(list4)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) self.assertLess(list1, list3) self.assertLessEqual(list1, list3) @@ -264,12 +255,6 @@ class TestSmartList(unittest.TestCase): self.assertEqual([0, 2, 2, 3, 4, 5], list1) list1.sort(reverse=True) self.assertEqual([5, 4, 3, 2, 2, 0], list1) - if not py3k: - func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3 - list1.sort(cmp=func) - self.assertEqual([3, 4, 2, 2, 5, 0], list1) - list1.sort(cmp=func, reverse=True) - self.assertEqual([0, 5, 4, 2, 2, 3], list1) list3.sort(key=lambda i: i[1]) self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) list3.sort(key=lambda i: i[1], reverse=True) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 11ee6b7..673d5fa 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,12 +19,10 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from sys import getdefaultencoding from types import GeneratorType import unittest -from mwparserfromhell.compat import bytes, py3k, range, str from mwparserfromhell.string_mixin import StringMixIn class _FakeString(StringMixIn): @@ -42,18 +39,16 @@ class TestStringMixIn(unittest.TestCase): def test_docs(self): """make sure the various methods of StringMixIn have docstrings""" methods = [ - "capitalize", "center", "count", "encode", "endswith", - "expandtabs", "find", "format", "index", "isalnum", "isalpha", - "isdecimal", "isdigit", "islower", "isnumeric", "isspace", - "istitle", "isupper", "join", "ljust", "lower", "lstrip", - "partition", "replace", "rfind", "rindex", "rjust", "rpartition", - "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", - "swapcase", "title", "translate", "upper", "zfill"] - if py3k: - methods.extend(["casefold", "format_map", "isidentifier", "isprintable", - "maketrans"]) - else: - methods.append("decode") + "capitalize", "casefold", "center", "count", "encode", "endswith", + "expandtabs", "find", "format", "format_map", "index", "isalnum", + "isalpha", "isdecimal", "isdigit", "isidentifier", "islower", + "isnumeric", "isprintable", "isspace", "istitle", "isupper", + "join", "ljust", "lower", "lstrip", "maketrans", "partition", + "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", + "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", + "title", "translate", "upper", "zfill" + ] + for meth in methods: expected = getattr("foo", meth).__doc__ actual = getattr(_FakeString("foo"), meth).__doc__ @@ -64,17 +59,11 @@ class TestStringMixIn(unittest.TestCase): fstr = _FakeString("fake string") self.assertEqual(str(fstr), "fake string") self.assertEqual(bytes(fstr), b"fake string") - if py3k: - self.assertEqual(repr(fstr), "'fake string'") - else: - self.assertEqual(repr(fstr), b"u'fake string'") + self.assertEqual(repr(fstr), "'fake string'") self.assertIsInstance(str(fstr), str) self.assertIsInstance(bytes(fstr), bytes) - if py3k: - self.assertIsInstance(repr(fstr), str) - else: - self.assertIsInstance(repr(fstr), bytes) + self.assertIsInstance(repr(fstr), str) def test_comparisons(self): """make sure comparison operators work""" @@ -179,14 +168,6 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(1, str1.count("r", 5, 9)) self.assertEqual(0, str1.count("r", 5, 7)) - if not py3k: - str2 = _FakeString("fo") - self.assertEqual(str1, str1.decode()) - actual = _FakeString("\\U00010332\\U0001033f\\U00010344") - self.assertEqual("πŒ²πŒΏπ„", actual.decode("unicode_escape")) - self.assertRaises(UnicodeError, str2.decode, "punycode") - self.assertEqual("", str2.decode("punycode", "ignore")) - str3 = _FakeString("πŒ²πŒΏπ„") actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" self.assertEqual(b"fake string", str1.encode()) @@ -233,10 +214,9 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz")) self.assertRaises(IndexError, str8.format, "abc") - if py3k: - self.assertEqual("fake string", str1.format_map({})) - self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) - self.assertRaises(ValueError, str5.format_map, {0: "abc"}) + self.assertEqual("fake string", str1.format_map({})) + self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) + self.assertRaises(ValueError, str5.format_map, {0: "abc"}) self.assertEqual(3, str1.index("e")) self.assertRaises(ValueError, str1.index, "z") @@ -269,11 +249,10 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str13.isdigit()) self.assertTrue(str14.isdigit()) - if py3k: - self.assertTrue(str9.isidentifier()) - self.assertTrue(str10.isidentifier()) - self.assertFalse(str11.isidentifier()) - self.assertFalse(str12.isidentifier()) + self.assertTrue(str9.isidentifier()) + self.assertTrue(str10.isidentifier()) + self.assertFalse(str11.isidentifier()) + self.assertFalse(str12.isidentifier()) str15 = _FakeString("") str16 = _FakeString("FooBar") @@ -286,13 +265,12 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(str13.isnumeric()) self.assertTrue(str14.isnumeric()) - if py3k: - str16B = _FakeString("\x01\x02") - self.assertTrue(str9.isprintable()) - self.assertTrue(str13.isprintable()) - self.assertTrue(str14.isprintable()) - self.assertTrue(str15.isprintable()) - self.assertFalse(str16B.isprintable()) + str16B = _FakeString("\x01\x02") + self.assertTrue(str9.isprintable()) + self.assertTrue(str13.isprintable()) + self.assertTrue(str14.isprintable()) + self.assertTrue(str15.isprintable()) + self.assertFalse(str16B.isprintable()) str17 = _FakeString(" ") str18 = _FakeString("\t \t \r\n") @@ -323,10 +301,9 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("", str15.lower()) self.assertEqual("foobar", str16.lower()) self.assertEqual("ß", str22.lower()) - if py3k: - self.assertEqual("", str15.casefold()) - self.assertEqual("foobar", str16.casefold()) - self.assertEqual("ss", str22.casefold()) + self.assertEqual("", str15.casefold()) + self.assertEqual("foobar", str16.casefold()) + self.assertEqual("ss", str22.casefold()) str23 = _FakeString(" fake string ") self.assertEqual("fake string", str1.lstrip()) @@ -372,9 +349,8 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] self.assertEqual(actual, str25.rsplit(" ", 3)) - if py3k: - actual = [" this is a", "sentence", "with", "whitespace"] - self.assertEqual(actual, str25.rsplit(maxsplit=3)) + actual = [" this is a", "sentence", "with", "whitespace"] + self.assertEqual(actual, str25.rsplit(maxsplit=3)) self.assertEqual("fake string", str1.rstrip()) self.assertEqual(" fake string", str23.rstrip()) @@ -390,9 +366,8 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] self.assertEqual(actual, str25.split(" ", 3)) - if py3k: - actual = ["this", "is", "a", "sentence with whitespace "] - self.assertEqual(actual, str25.split(maxsplit=3)) + actual = ["this", "is", "a", "sentence with whitespace "] + self.assertEqual(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") self.assertEqual(["lines", "of", "text", "are", "presented", "here"], @@ -411,17 +386,13 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("Fake String", str1.title()) - if py3k: - table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", - 111: "4", 117: "5"}) - table2 = StringMixIn.maketrans("aeiou", "12345") - table3 = StringMixIn.maketrans("aeiou", "12345", "rts") - self.assertEqual("f1k2 str3ng", str1.translate(table1)) - self.assertEqual("f1k2 str3ng", str1.translate(table2)) - self.assertEqual("f1k2 3ng", str1.translate(table3)) - else: - table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} - self.assertEqual("f1k2 str3ng", str1.translate(table)) + table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", + 111: "4", 117: "5"}) + table2 = StringMixIn.maketrans("aeiou", "12345") + table3 = StringMixIn.maketrans("aeiou", "12345", "rts") + self.assertEqual("f1k2 str3ng", str1.translate(table1)) + self.assertEqual("f1k2 str3ng", str1.translate(table2)) + self.assertEqual("f1k2 3ng", str1.translate(table3)) self.assertEqual("", str15.upper()) self.assertEqual("FOOBAR", str16.upper()) diff --git a/tests/test_tag.py b/tests/test_tag.py index c8c9808..860a94b 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Tag, Template, Text from mwparserfromhell.nodes.extras import Attribute from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext diff --git a/tests/test_template.py b/tests/test_template.py index e03a564..461371d 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2017 Ben Kurtovic # @@ -20,11 +19,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from difflib import unified_diff import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell import parse diff --git a/tests/test_text.py b/tests/test_text.py index 4464418..94da937 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text class TestText(unittest.TestCase): diff --git a/tests/test_tokens.py b/tests/test_tokens.py index e766002..6ce28b5 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import py3k from mwparserfromhell.parser import tokens class TestTokens(unittest.TestCase): @@ -64,14 +61,9 @@ class TestTokens(unittest.TestCase): hundredchars = ("earwig" * 100)[:97] + "..." self.assertEqual("Token()", repr(token1)) - if py3k: - token2repr1 = "Token(foo='bar', baz=123)" - token2repr2 = "Token(baz=123, foo='bar')" - token3repr = "Text(text='" + hundredchars + "')" - else: - token2repr1 = "Token(foo=u'bar', baz=123)" - token2repr2 = "Token(baz=123, foo=u'bar')" - token3repr = "Text(text=u'" + hundredchars + "')" + token2repr1 = "Token(foo='bar', baz=123)" + token2repr2 = "Token(baz=123, foo='bar')" + token3repr = "Text(text='" + hundredchars + "')" token2repr = repr(token2) self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) self.assertEqual(token3repr, repr(token3)) diff --git a/tests/test_utils.py b/tests/test_utils.py index b79b544..b8572fd 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,7 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest from mwparserfromhell.nodes import Template, Text diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 307ee9a..9701865 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,13 +19,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from functools import partial import re from types import GeneratorType import unittest -from mwparserfromhell.compat import py3k, str from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) from mwparserfromhell.smart_list import SmartList diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 487b7af..1865b6e 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext From 91ed26d86403dcbc51f831879ba5ceaf5df9ac80 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Mon, 6 Jan 2020 21:20:59 -0800 Subject: [PATCH 2/2] Set python_requires in setup.py to ">= 3.4" In addition to replacing the manual version check, this will also instruct pip to download an older version of mwparserfromhell for users running earlier Python versions rather than just getting something broken. --- setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 74f7567..f339665 100644 --- a/setup.py +++ b/setup.py @@ -25,9 +25,6 @@ from glob import glob from os import environ import sys -if sys.version_info[1] == 3 and sys.version_info[1] < 4: - raise RuntimeError("mwparserfromhell needs 3.4+") - from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext @@ -80,6 +77,7 @@ setup( ext_modules = [tokenizer] if use_extension else [], test_suite = "tests", version = __version__, + python_requires = ">= 3.4", author = "Ben Kurtovic", author_email = "ben.kurtovic@gmail.com", url = "https://github.com/earwig/mwparserfromhell",