From 996570993479fd57a503a7901c6c143c71a92a0e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 15 May 2019 00:17:33 -0400 Subject: [PATCH 01/31] Version bump; update release script; update Travis --- .travis.yml | 2 ++ CHANGELOG | 4 ++++ appveyor.yml | 2 +- docs/changelog.rst | 8 ++++++++ mwparserfromhell/__init__.py | 2 +- scripts/release.sh | 6 +++--- 6 files changed, 19 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5fc2718..4665c33 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,11 @@ +dist: xenial language: python python: - 2.7 - 3.4 - 3.5 - 3.6 + - 3.7 - nightly sudo: false install: diff --git a/CHANGELOG b/CHANGELOG index 7c85c2b..ece051e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,7 @@ +v0.6 (unreleased): + +- ... + v0.5.4 (released May 15, 2019): - Fixed an unlikely crash in the C tokenizer when interrupted while parsing diff --git a/appveyor.yml b/appveyor.yml index 3d738e4..076fc21 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ # This config file is used by appveyor.com to build Windows release binaries -version: 0.5.4-b{build} +version: 0.6.dev0-b{build} branches: only: diff --git a/docs/changelog.rst b/docs/changelog.rst index 808e9b9..948c9c1 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,14 @@ Changelog ========= +v0.6 +---- + +Unreleased +(`changes `__): + +- ... + v0.5.4 ------ diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 4e4d440..e8aa944 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki `_ wikicode. __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012-2019 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.5.4" +__version__ = "0.6.dev0" __email__ = "ben.kurtovic@gmail.com" from . import (compat, definitions, nodes, parser, smart_list, string_mixin, diff --git a/scripts/release.sh b/scripts/release.sh index aa497e7..71595c5 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -84,7 +84,7 @@ post_release() { echo echo "*** Release completed." echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION" - echo "*** Verify: https://pypi.python.org/pypi/mwparserfromhell" + echo "*** Verify: https://pypi.org/project/mwparserfromhell" echo "*** Verify: https://ci.appveyor.com/project/earwig/mwparserfromhell" echo "*** Verify: https://mwparserfromhell.readthedocs.io" echo "*** Press enter to sanity-check the release." @@ -96,7 +96,7 @@ test_release() { echo "Checking mwparserfromhell v$VERSION..." echo -n "Creating a virtualenv..." virtdir="mwparser-test-env" - virtualenv -q $virtdir + python -m venv $virtdir cd $virtdir source bin/activate echo " done." @@ -104,7 +104,7 @@ test_release() { pip -q install mwparserfromhell echo " done." echo -n "Checking version..." - reported_version=$(python -c 'print __import__("mwparserfromhell").__version__') + reported_version=$(python -c 'print(__import__("mwparserfromhell").__version__)') if [[ "$reported_version" != "$VERSION" ]]; then echo " error." echo "*** ERROR: mwparserfromhell is reporting its version as $reported_version, not $VERSION!" From 6136b1b205f329782cfd7b9054557b840391ac62 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 30 Jun 2019 00:57:06 -0400 Subject: [PATCH 02/31] Make Wikicode.matches() treat _ and space as equivalent (fixes #216) --- CHANGELOG | 3 ++- docs/changelog.rst | 3 ++- mwparserfromhell/wikicode.py | 12 ++++++------ tests/test_wikicode.py | 16 ++++++++++++---- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index ece051e..d95b07c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ v0.6 (unreleased): -- ... +- Updated Wikicode.matches() to recognize underscores as being equivalent + to spaces. (#216) v0.5.4 (released May 15, 2019): diff --git a/docs/changelog.rst b/docs/changelog.rst index 948c9c1..c46e8f1 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,7 +7,8 @@ v0.6 Unreleased (`changes `__): -- ... +- Updated Wikicode.matches() to recognize underscores as being equivalent + to spaces. (`#216 `_) v0.5.4 ------ diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 0cc7276..840d8ed 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2017 Ben Kurtovic +# Copyright (C) 2012-2019 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -501,16 +501,16 @@ class Wikicode(StringMixIn): letter's case is normalized. Typical usage is ``if template.name.matches("stub"): ...``. """ - cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:] - if a and b else a == b) - this = self.strip_code().strip() + normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s + this = normalize(self.strip_code().strip()) + if isinstance(other, (str, bytes, Wikicode, Node)): that = parse_anything(other).strip_code().strip() - return cmp(this, that) + return this == normalize(that) for obj in other: that = parse_anything(obj).strip_code().strip() - if cmp(this, that): + if this == normalize(that): return True return False diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index fceb272..307ee9a 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -312,7 +312,9 @@ class TestWikicode(TreeEqualityTestCase): """test Wikicode.matches()""" code1 = parse("Cleanup") code2 = parse("\nstub") - code3 = parse("") + code3 = parse("Hello world!") + code4 = parse("World,_hello?") + code5 = parse("") self.assertTrue(code1.matches("Cleanup")) self.assertTrue(code1.matches("cleanup")) self.assertTrue(code1.matches(" cleanup\n")) @@ -327,9 +329,15 @@ class TestWikicode(TreeEqualityTestCase): self.assertFalse(code2.matches(["StuB", "sTUb", "foobar"])) self.assertTrue(code2.matches(("StuB", "sTUb", "foo", "bar", "Stub"))) self.assertTrue(code2.matches(["StuB", "sTUb", "foo", "bar", "Stub"])) - self.assertTrue(code3.matches("")) - self.assertTrue(code3.matches("")) - self.assertTrue(code3.matches(("a", "b", ""))) + self.assertTrue(code3.matches("hello world!")) + self.assertTrue(code3.matches("hello_world!")) + self.assertFalse(code3.matches("hello__world!")) + self.assertTrue(code4.matches("World,_hello?")) + self.assertTrue(code4.matches("World, hello?")) + self.assertFalse(code4.matches("World, hello?")) + self.assertTrue(code5.matches("")) + self.assertTrue(code5.matches("")) + self.assertTrue(code5.matches(("a", "b", ""))) def test_filter_family(self): """test the Wikicode.i?filter() family of functions""" From b6e4c590040414592371e96516e9c03c4c22aa5c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 9 Sep 2019 00:01:14 -0400 Subject: [PATCH 03/31] Switch to requests for basic API example (closes #219); update links --- LICENSE | 2 +- README.rst | 46 ++++++++++++++++++++++++++------------------ docs/conf.py | 2 +- docs/index.rst | 12 ++++++------ docs/integration.rst | 34 ++++++++++++++++++++------------ mwparserfromhell/__init__.py | 2 +- tests/test_docs.py | 13 ++++++++++--- 7 files changed, 68 insertions(+), 43 deletions(-) diff --git a/LICENSE b/LICENSE index f353cd7..c846a0e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2012-2018 Ben Kurtovic +Copyright (C) 2012-2019 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.rst b/README.rst index 65474cf..9428997 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,7 @@ mwparserfromhell .. image:: https://img.shields.io/travis/earwig/mwparserfromhell/develop.svg :alt: Build Status - :target: http://travis-ci.org/earwig/mwparserfromhell + :target: https://travis-ci.org/earwig/mwparserfromhell .. image:: https://img.shields.io/coveralls/earwig/mwparserfromhell/develop.svg :alt: Coverage Status @@ -177,36 +177,44 @@ If you're using Pywikibot_, your code might look like this: text = page.get() return mwparserfromhell.parse(text) -If you're not using a library, you can parse any page using the following -Python 3 code (via the API_): +If you're not using a library, you can parse any page with the following +Python 3 code (using the API_ and the requests_ library): .. code-block:: python - import json - from urllib.parse import urlencode - from urllib.request import urlopen + import requests import mwparserfromhell + API_URL = "https://en.wikipedia.org/w/api.php" def parse(title): - data = {"action": "query", "prop": "revisions", "rvprop": "content", - "rvslots": "main", "rvlimit": 1, "titles": title, - "format": "json", "formatversion": "2"} - raw = urlopen(API_URL, urlencode(data).encode()).read() - res = json.loads(raw) + params = { + "action": "query", + "prop": "revisions", + "rvprop": "content", + "rvslots": "main", + "rvlimit": 1, + "titles": title, + "format": "json", + "formatversion": "2", + } + headers = {"User-Agent": "My-Bot-Name/1.0"} + req = requests.get(API_URL, headers=headers, params=params) + res = req.json() revision = res["query"]["pages"][0]["revisions"][0] text = revision["slots"]["main"]["content"] return mwparserfromhell.parse(text) -.. _MediaWiki: http://mediawiki.org -.. _ReadTheDocs: http://mwparserfromhell.readthedocs.io -.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig -.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 -.. _Legoktm: http://en.wikipedia.org/wiki/User:Legoktm +.. _MediaWiki: https://www.mediawiki.org +.. _ReadTheDocs: https://mwparserfromhell.readthedocs.io +.. _Earwig: https://en.wikipedia.org/wiki/User:The_Earwig +.. _Σ: https://en.wikipedia.org/wiki/User:%CE%A3 +.. _Legoktm: https://en.wikipedia.org/wiki/User:Legoktm .. _GitHub: https://github.com/earwig/mwparserfromhell -.. _Python Package Index: http://pypi.python.org -.. _get pip: http://pypi.python.org/pypi/pip +.. _Python Package Index: https://pypi.org/ +.. _get pip: https://pypi.org/project/pip/ .. _Word-ending links: https://www.mediawiki.org/wiki/Help:Links#linktrail .. _EarwigBot: https://github.com/earwig/earwigbot .. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot -.. _API: http://mediawiki.org/wiki/API +.. _API: https://www.mediawiki.org/wiki/API:Main_page +.. _requests: https://2.python-requests.org diff --git a/docs/conf.py b/docs/conf.py index 5ac9c70..9666cd0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = u'mwparserfromhell' -copyright = u'2012–2018 Ben Kurtovic' +copyright = u'2012–2019 Ben Kurtovic' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docs/index.rst b/docs/index.rst index 06dc2f9..e3bfac0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,10 +8,10 @@ wikicode. It supports Python 2 and Python 3. Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. Development occurs on GitHub_. -.. _MediaWiki: http://mediawiki.org -.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig -.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 -.. _Legoktm: http://en.wikipedia.org/wiki/User:Legoktm +.. _MediaWiki: https://www.mediawiki.org +.. _Earwig: https://en.wikipedia.org/wiki/User:The_Earwig +.. _Σ: https://en.wikipedia.org/wiki/User:%CE%A3 +.. _Legoktm: https://en.wikipedia.org/wiki/User:Legoktm .. _GitHub: https://github.com/earwig/mwparserfromhell Installation @@ -30,8 +30,8 @@ Alternatively, get the latest development version:: You can run the comprehensive unit testing suite with ``python setup.py test -q``. -.. _Python Package Index: http://pypi.python.org -.. _get pip: http://pypi.python.org/pypi/pip +.. _Python Package Index: https://pypi.org/ +.. _get pip: https://pypi.org/project/pip/ Contents -------- diff --git a/docs/integration.rst b/docs/integration.rst index c1c1f39..b1c2df4 100644 --- a/docs/integration.rst +++ b/docs/integration.rst @@ -7,7 +7,7 @@ Integration :func:`mwparserfromhell.parse() ` on :meth:`~earwigbot.wiki.page.Page.get`. -If you're using Pywikibot_, your code might look like this:: +If you're using Pywikibot_, your code might look like this: import mwparserfromhell import pywikibot @@ -18,23 +18,33 @@ If you're using Pywikibot_, your code might look like this:: text = page.get() return mwparserfromhell.parse(text) -If you're not using a library, you can parse any page using the following code -(via the API_):: +If you're not using a library, you can parse any page with the following +Python 3 code (using the API_ and the requests_ library): - import json - from urllib.parse import urlencode - from urllib.request import urlopen + import requests import mwparserfromhell + API_URL = "https://en.wikipedia.org/w/api.php" def parse(title): - data = {"action": "query", "prop": "revisions", "rvlimit": 1, - "rvprop": "content", "format": "json", "titles": title} - raw = urlopen(API_URL, urlencode(data).encode()).read() - res = json.loads(raw) - text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] + params = { + "action": "query", + "prop": "revisions", + "rvprop": "content", + "rvslots": "main", + "rvlimit": 1, + "titles": title, + "format": "json", + "formatversion": "2", + } + headers = {"User-Agent": "My-Bot-Name/1.0"} + req = requests.get(API_URL, headers=headers, params=params) + res = req.json() + revision = res["query"]["pages"][0]["revisions"][0] + text = revision["slots"]["main"]["content"] return mwparserfromhell.parse(text) .. _EarwigBot: https://github.com/earwig/earwigbot .. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot -.. _API: http://mediawiki.org/wiki/API +.. _API: https://www.mediawiki.org/wiki/API:Main_page +.. _requests: https://2.python-requests.org diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index e8aa944..f867e26 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -23,7 +23,7 @@ """ `mwparserfromhell `_ (the MediaWiki Parser from Hell) is a Python package that provides an easy-to-use and -outrageously powerful parser for `MediaWiki `_ wikicode. +outrageously powerful parser for `MediaWiki `_ wikicode. """ __author__ = "Ben Kurtovic" diff --git a/tests/test_docs.py b/tests/test_docs.py index ef596d6..8559493 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -114,9 +114,16 @@ class TestDocs(unittest.TestCase): url1 = "https://en.wikipedia.org/w/api.php" url2 = "https://en.wikipedia.org/w/index.php?title={0}&action=raw" title = "Test" - data = {"action": "query", "prop": "revisions", "rvprop": "content", - "rvslots": "main", "rvlimit": 1, "titles": title, - "format": "json", "formatversion": "2"} + data = { + "action": "query", + "prop": "revisions", + "rvprop": "content", + "rvslots": "main", + "rvlimit": 1, + "titles": title, + "format": "json", + "formatversion": "2", + } try: raw = urlopen(url1, urlencode(data).encode("utf8")).read() except IOError: From 83b58655ea72980c9633f393ff49f1e63d348c7c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 29 Sep 2019 23:07:49 -0400 Subject: [PATCH 04/31] Experiment with DeepSource --- .deepsource.toml | 13 +++++++++++++ README.rst | 2 +- docs/integration.rst | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 0000000..9af5e82 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,13 @@ +version = 1 + +test_patterns = [ + '/tests/*' +] + +exclude_patterns = [ +] + +[[analyzers]] +name = 'python' +enabled = true +runtime_version = '3.x.x' diff --git a/README.rst b/README.rst index 9428997..343e762 100644 --- a/README.rst +++ b/README.rst @@ -182,8 +182,8 @@ Python 3 code (using the API_ and the requests_ library): .. code-block:: python - import requests import mwparserfromhell + import requests API_URL = "https://en.wikipedia.org/w/api.php" diff --git a/docs/integration.rst b/docs/integration.rst index b1c2df4..8054d9f 100644 --- a/docs/integration.rst +++ b/docs/integration.rst @@ -21,8 +21,8 @@ If you're using Pywikibot_, your code might look like this: If you're not using a library, you can parse any page with the following Python 3 code (using the API_ and the requests_ library): - import requests import mwparserfromhell + import requests API_URL = "https://en.wikipedia.org/w/api.php" From ee99c7bd413a6f33c8635e689bf1cf337eb65e86 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 4 Dec 2019 16:57:00 -0500 Subject: [PATCH 05/31] Experiment concluded --- .deepsource.toml | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml deleted file mode 100644 index 9af5e82..0000000 --- a/.deepsource.toml +++ /dev/null @@ -1,13 +0,0 @@ -version = 1 - -test_patterns = [ - '/tests/*' -] - -exclude_patterns = [ -] - -[[analyzers]] -name = 'python' -enabled = true -runtime_version = '3.x.x' From 2a4e1f431642a037ed1c5985137ac33d178ff7f1 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 6 Dec 2019 00:40:05 -0500 Subject: [PATCH 06/31] Add contexts.describe() for debugging --- mwparserfromhell/parser/contexts.py | 15 ++++++++++++++- tests/test_pytokenizer.py | 8 +++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index af6dea6..cac5250 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2017 Ben Kurtovic +# Copyright (C) 2012-2019 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -193,3 +193,16 @@ UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE + TABLE_ROW_OPEN NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK + +def describe(context): + """Return a string describing the given context value, for debugging.""" + flags = [] + for name, value in globals().items(): + if not isinstance(value, int) or name.startswith("GL_"): + continue + if bin(value).count("1") != 1: + continue # Hacky way to skip aggregate contexts + if context & value: + flags.append((name, value)) + flags.sort(key=lambda it: it[1]) + return "|".join(it[0] for it in flags) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 85a55b9..a4c9bc1 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2019 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,6 +23,7 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.parser import contexts from mwparserfromhell.parser.tokenizer import Tokenizer from ._test_tokenizer import TokenizerTestCase @@ -40,5 +41,10 @@ class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): self.assertFalse(Tokenizer.USES_C) self.assertFalse(Tokenizer().USES_C) + def test_describe_context(self): + self.assertEqual("", contexts.describe(0)) + ctx = contexts.describe(contexts.TEMPLATE_PARAM_KEY|contexts.HAS_TEXT) + self.assertEqual("TEMPLATE_PARAM_KEY|HAS_TEXT", ctx) + if __name__ == "__main__": unittest.main(verbosity=2) From b3c98efd22bd7e49e68480bbf492bc62314f981e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 6 Dec 2019 01:06:13 -0500 Subject: [PATCH 07/31] Fix a parsing bug involving deeply nested style tags (fixes #224) --- CHANGELOG | 1 + docs/changelog.rst | 2 ++ mwparserfromhell/parser/ctokenizer/tok_parse.c | 5 +++++ mwparserfromhell/parser/tokenizer.py | 6 +++++- tests/tokenizer/integration.mwtest | 7 +++++++ 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index d95b07c..dee81fb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,7 @@ v0.6 (unreleased): - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (#216) +- Fixed a rare parsing bug involving deeply nested style tags. (#224) v0.5.4 (released May 15, 2019): diff --git a/docs/changelog.rst b/docs/changelog.rst index c46e8f1..216c46e 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,8 @@ Unreleased - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (`#216 `_) +- Fixed a rare parsing bug involving deeply nested style tags. + (`#224 `_) v0.5.4 ------ diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index c32e48c..deac6c5 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -1807,6 +1807,11 @@ static int Tokenizer_parse_italics(Tokenizer* self) if (BAD_ROUTE_CONTEXT & LC_STYLE_PASS_AGAIN) { context = LC_STYLE_ITALICS | LC_STYLE_SECOND_PASS; stack = Tokenizer_parse(self, context, 1); + if (BAD_ROUTE) { + RESET_ROUTE(); + self->head = reset; + return Tokenizer_emit_text(self, "''"); + } } else return Tokenizer_emit_text(self, "''"); diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 7b2f3ce..f44360e 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -931,7 +931,11 @@ class Tokenizer(object): self._head = reset if route.context & contexts.STYLE_PASS_AGAIN: new_ctx = contexts.STYLE_ITALICS | contexts.STYLE_SECOND_PASS - stack = self._parse(new_ctx) + try: + stack = self._parse(new_ctx) + except BadRoute: + self._head = reset + return self._emit_text("''") else: return self._emit_text("''") self._emit_style_tag("i", "''", stack) diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index 7137c50..7ab51c6 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -353,3 +353,10 @@ name: many_invalid_nested_tags label: many unending nested tags that should be treated as plain text, followed by valid wikitext (see issues #42, #183) input: "[[{{x}}" output: [Text(text="[["), TemplateOpen(), Text(text="x"), TemplateClose()] + +--- + +name: nested_templates_and_style_tags +label: many nested templates and style tags, testing edge case behavior and error recovery near the recursion depth limit (see issue #224) +input: "{{a|'''}}{{b|1='''c''}}{{d|1='''e''}}{{f|1='''g''}}{{h|1='''i''}}{{j|1='''k''}}{{l|1='''m''}}{{n|1='''o''}}{{p|1='''q''}}{{r|1=''s'''}}{{t|1='''u''}}{{v|1='''w''x'''y'''}}\n{|\n|-\n|'''\n|}" +output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="'''"), TemplateClose(), TemplateOpen(), Text(text="b"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="c"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="d"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="e"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="f"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="g"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="h"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="i"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="j"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="k"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="m"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="o''}}"), TemplateOpen(), Text(text="p"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="q"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="r"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="''s'''"), TemplateClose(), TemplateOpen(), Text(text="t"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="u"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), Text(text="{{v|1="), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="w''x"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="y"), TagOpenClose(), Text(text="b"), TagCloseClose(), TemplateClose(), Text(text="\n"), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="'''\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] From b09b619709dcb9914ce0f7df32811da7ea2e2761 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 6 Dec 2019 01:25:16 -0500 Subject: [PATCH 08/31] Switch to 'unittest discover' over 'setup.py test' --- README.rst | 2 +- appveyor.yml | 10 ++++++---- docs/index.rst | 2 +- scripts/release.sh | 3 ++- tests/_test_tokenizer.py | 11 ++++++----- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index 343e762..a94f4e2 100644 --- a/README.rst +++ b/README.rst @@ -30,7 +30,7 @@ Alternatively, get the latest development version:: python setup.py install You can run the comprehensive unit testing suite with -``python setup.py test -q``. +``python -m unittest discover``. Usage ----- diff --git a/appveyor.yml b/appveyor.yml index 076fc21..854f970 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -13,9 +13,10 @@ environment: global: # See: http://stackoverflow.com/a/13751649/163740 WRAPPER: "cmd /E:ON /V:ON /C .\\scripts\\win_wrapper.cmd" - PIP: "%WRAPPER% %PYTHON%\\python.exe -m pip" - SETUPPY: "%WRAPPER% %PYTHON%\\python.exe setup.py --with-extension" - TWINE: "%WRAPPER% %PYTHON%\\python.exe -m twine" + PYEXE: "%WRAPPER% %PYTHON%\\python.exe" + SETUPPY: "%PYEXE% setup.py --with-extension" + PIP: "%PYEXE% -m pip" + TWINE: "%PYEXE% -m twine" PYPI_USERNAME: "earwigbot" PYPI_PASSWORD: secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+ @@ -67,9 +68,10 @@ install: build_script: - "%SETUPPY% build" + - "%SETUPPY% install --user" test_script: - - "%SETUPPY% -q test" + - "%PYEXE% -m unittest discover" after_test: - "%SETUPPY% bdist_wheel" diff --git a/docs/index.rst b/docs/index.rst index e3bfac0..8a1621f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -28,7 +28,7 @@ Alternatively, get the latest development version:: python setup.py install You can run the comprehensive unit testing suite with -``python setup.py test -q``. +``python -m unittest discover``. .. _Python Package Index: https://pypi.org/ .. _get pip: https://pypi.org/project/pip/ diff --git a/scripts/release.sh b/scripts/release.sh index 71595c5..f7143c8 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -133,7 +133,8 @@ test_release() { rm mwparserfromhell.tar.gz mwparserfromhell.tar.gz.asc cd mwparserfromhell-$VERSION echo "Running unit tests..." - python setup.py -q test + python setup.py -q install + python -m unittest discover if [[ "$?" != "0" ]]; then echo "*** ERROR: Unit tests failed!" deactivate diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index c314df8..4d19dd4 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -20,10 +20,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import print_function, unicode_literals +from __future__ import unicode_literals import codecs from os import listdir, path import sys +import warnings from mwparserfromhell.compat import py3k, str from mwparserfromhell.parser import tokens @@ -98,19 +99,19 @@ class TokenizerTestCase(object): except _TestParseError as err: if data["name"]: error = "Could not parse test '{0}' in '{1}':\n\t{2}" - print(error.format(data["name"], filename, err)) + warnings.warn(error.format(data["name"], filename, err)) else: error = "Could not parse a test in '{0}':\n\t{1}" - print(error.format(filename, err)) + warnings.warn(error.format(filename, err)) continue if not data["name"]: error = "A test in '{0}' was ignored because it lacked a name" - print(error.format(filename)) + warnings.warn(error.format(filename)) continue if data["input"] is None or data["output"] is None: error = "Test '{}' in '{}' was ignored because it lacked an input or an output" - print(error.format(data["name"], filename)) + warnings.warn(error.format(data["name"], filename)) continue number = str(counter).zfill(digits) From 56e776a5780f124804e3b1153d19821171993078 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 6 Dec 2019 01:31:22 -0500 Subject: [PATCH 09/31] Update .travis.yml test running --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4665c33..8dddf1f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,9 +10,9 @@ python: sudo: false install: - pip install coveralls - - python setup.py build + - python setup.py develop script: - - coverage run --source=mwparserfromhell setup.py -q test + - coverage run --source=mwparserfromhell -m unittest discover after_success: - coveralls env: From 1d5db417518286180538d5ce533aac5d3bc7c365 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 6 Dec 2019 01:47:24 -0500 Subject: [PATCH 10/31] Add Python 3.8 to CI --- .travis.yml | 3 +-- CHANGELOG | 1 + appveyor.yml | 10 +++++++++- docs/changelog.rst | 1 + setup.py | 1 + 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8dddf1f..0ecf3fe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,8 +6,7 @@ python: - 3.5 - 3.6 - 3.7 - - nightly -sudo: false + - 3.8 install: - pip install coveralls - python setup.py develop diff --git a/CHANGELOG b/CHANGELOG index dee81fb..53b3548 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ v0.6 (unreleased): +- Added support for Python 3.8. - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (#216) - Fixed a rare parsing bug involving deeply nested style tags. (#224) diff --git a/appveyor.yml b/appveyor.yml index 854f970..20f9e35 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -62,13 +62,21 @@ environment: PYTHON_VERSION: "3.7" PYTHON_ARCH: "64" + - PYTHON: "C:\\Python38" + PYTHON_VERSION: "3.8" + PYTHON_ARCH: "32" + + - PYTHON: "C:\\Python38-x64" + PYTHON_VERSION: "3.8" + PYTHON_ARCH: "64" + install: - "%PIP% install --disable-pip-version-check --user --upgrade pip" - "%PIP% install wheel twine" build_script: - "%SETUPPY% build" - - "%SETUPPY% install --user" + - "%SETUPPY% develop --user" test_script: - "%PYEXE% -m unittest discover" diff --git a/docs/changelog.rst b/docs/changelog.rst index 216c46e..1ca7411 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,7 @@ v0.6 Unreleased (`changes `__): +- Added support for Python 3.8. - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (`#216 `_) - Fixed a rare parsing bug involving deeply nested style tags. diff --git a/setup.py b/setup.py index 8f84eb5..97abef0 100644 --- a/setup.py +++ b/setup.py @@ -105,6 +105,7 @@ setup( "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Topic :: Text Processing :: Markup" ], ) From e66a2db0ed2710a7ec9a6ae9708183befa4da5de Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Sun, 29 Dec 2019 22:39:06 -0500 Subject: [PATCH 11/31] add missing public items to __all__ Added a few more public classes, and sorted the __all__ value. The Attribute and Parameter classes are not included as they can be imported from the `mwparserfromhell.nodes.extras` without linter complaining. They could be added to this __all__ too. --- mwparserfromhell/nodes/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 9418199..1e38254 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -34,8 +34,8 @@ from __future__ import unicode_literals from ..compat import str from ..string_mixin import StringMixIn -__all__ = ["Node", "Text", "Argument", "Heading", "HTMLEntity", "Tag", - "Template"] +__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", + "Node", "Tag", "Template", "Text", "Wikilink"] class Node(StringMixIn): """Represents the base Node type, demonstrating the methods to override. From ce4254fd192faa168be7a3c764f6c8fafaa96850 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Tue, 31 Dec 2019 11:26:23 -0500 Subject: [PATCH 12/31] ignore InteliJ IDEA files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3da2db3..cf67fa0 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ dist docs/_build scripts/*.log htmlcov/ +.idea/ From aa37425a9bf9eb335d0172b7d7897a32cfd69158 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Wed, 1 Jan 2020 23:15:49 -0500 Subject: [PATCH 13/31] move smart_list into sub-package/multiple files Step one of refactoring - making SmartList into its own package, with each class having its own file. No code changes were made. Note that SmartList and ListProxy import each other, so had to import SmartList as a full package name rather than use from ... import ... construct. --- mwparserfromhell/smart_list.py | 456 ------------------------------- mwparserfromhell/smart_list/ListProxy.py | 240 ++++++++++++++++ mwparserfromhell/smart_list/SmartList.py | 162 +++++++++++ mwparserfromhell/smart_list/__init__.py | 1 + mwparserfromhell/smart_list/utils.py | 59 ++++ mwparserfromhell/wikicode.py | 5 +- tests/test_smart_list.py | 5 +- 7 files changed, 469 insertions(+), 459 deletions(-) delete mode 100644 mwparserfromhell/smart_list.py create mode 100644 mwparserfromhell/smart_list/ListProxy.py create mode 100644 mwparserfromhell/smart_list/SmartList.py create mode 100644 mwparserfromhell/smart_list/__init__.py create mode 100644 mwparserfromhell/smart_list/utils.py diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py deleted file mode 100644 index e7fa59f..0000000 --- a/mwparserfromhell/smart_list.py +++ /dev/null @@ -1,456 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012-2016 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -""" -This module contains the :class:`.SmartList` type, as well as its -:class:`._ListProxy` child, which together implement a list whose sublists -reflect changes made to the main list, and vice-versa. -""" - -from __future__ import unicode_literals -from sys import maxsize -from weakref import ref - -from .compat import py3k - -__all__ = ["SmartList"] - -def inheritdoc(method): - """Set __doc__ of *method* to __doc__ of *method* in its parent class. - - Since this is used on :class:`.SmartList`, the "parent class" used is - ``list``. This function can be used as a decorator. - """ - method.__doc__ = getattr(list, method.__name__).__doc__ - return method - - -class _SliceNormalizerMixIn(object): - """MixIn that provides a private method to normalize slices.""" - - def _normalize_slice(self, key, clamp=False): - """Return a slice equivalent to the input *key*, standardized.""" - if key.start is None: - start = 0 - else: - start = (len(self) + key.start) if key.start < 0 else key.start - if key.stop is None or key.stop == maxsize: - stop = len(self) if clamp else None - else: - stop = (len(self) + key.stop) if key.stop < 0 else key.stop - return slice(start, stop, key.step or 1) - - -class SmartList(_SliceNormalizerMixIn, list): - """Implements the ``list`` interface with special handling of sublists. - - When a sublist is created (by ``list[i:j]``), any changes made to this - list (such as the addition, removal, or replacement of elements) will be - reflected in the sublist, or vice-versa, to the greatest degree possible. - This is implemented by having sublists - instances of the - :class:`._ListProxy` type - dynamically determine their elements by storing - their slice info and retrieving that slice from the parent. Methods that - change the size of the list also change the slice info. For example:: - - >>> parent = SmartList([0, 1, 2, 3]) - >>> parent - [0, 1, 2, 3] - >>> child = parent[2:] - >>> child - [2, 3] - >>> child.append(4) - >>> child - [2, 3, 4] - >>> parent - [0, 1, 2, 3, 4] - """ - - def __init__(self, iterable=None): - if iterable: - super(SmartList, self).__init__(iterable) - else: - super(SmartList, self).__init__() - self._children = {} - - def __getitem__(self, key): - if not isinstance(key, slice): - return super(SmartList, self).__getitem__(key) - key = self._normalize_slice(key, clamp=False) - sliceinfo = [key.start, key.stop, key.step] - child = _ListProxy(self, sliceinfo) - child_ref = ref(child, self._delete_child) - self._children[id(child_ref)] = (child_ref, sliceinfo) - return child - - def __setitem__(self, key, item): - if not isinstance(key, slice): - return super(SmartList, self).__setitem__(key, item) - item = list(item) - super(SmartList, self).__setitem__(key, item) - key = self._normalize_slice(key, clamp=True) - diff = len(item) + (key.start - key.stop) // key.step - if not diff: - return - values = self._children.values if py3k else self._children.itervalues - for child, (start, stop, step) in values(): - if start > key.stop: - self._children[id(child)][1][0] += diff - if stop is not None and stop >= key.stop: - self._children[id(child)][1][1] += diff - - def __delitem__(self, key): - super(SmartList, self).__delitem__(key) - if isinstance(key, slice): - key = self._normalize_slice(key, clamp=True) - else: - key = slice(key, key + 1, 1) - diff = (key.stop - key.start) // key.step - values = self._children.values if py3k else self._children.itervalues - for child, (start, stop, step) in values(): - if start > key.start: - self._children[id(child)][1][0] -= diff - if stop is not None and stop >= key.stop: - self._children[id(child)][1][1] -= diff - - if not py3k: - def __getslice__(self, start, stop): - return self.__getitem__(slice(start, stop)) - - def __setslice__(self, start, stop, iterable): - self.__setitem__(slice(start, stop), iterable) - - def __delslice__(self, start, stop): - self.__delitem__(slice(start, stop)) - - def __add__(self, other): - return SmartList(list(self) + other) - - def __radd__(self, other): - return SmartList(other + list(self)) - - def __iadd__(self, other): - self.extend(other) - return self - - def _delete_child(self, child_ref): - """Remove a child reference that is about to be garbage-collected.""" - del self._children[id(child_ref)] - - def _detach_children(self): - """Remove all children and give them independent parent copies.""" - children = [val[0] for val in self._children.values()] - for child in children: - child()._parent = list(self) - self._children.clear() - - @inheritdoc - def append(self, item): - head = len(self) - self[head:head] = [item] - - @inheritdoc - def extend(self, item): - head = len(self) - self[head:head] = item - - @inheritdoc - def insert(self, index, item): - self[index:index] = [item] - - @inheritdoc - def pop(self, index=None): - if index is None: - index = len(self) - 1 - item = self[index] - del self[index] - return item - - @inheritdoc - def remove(self, item): - del self[self.index(item)] - - @inheritdoc - def reverse(self): - self._detach_children() - super(SmartList, self).reverse() - - if py3k: - @inheritdoc - def sort(self, key=None, reverse=None): - self._detach_children() - kwargs = {} - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - super(SmartList, self).sort(**kwargs) - else: - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - self._detach_children() - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - super(SmartList, self).sort(**kwargs) - - -class _ListProxy(_SliceNormalizerMixIn, list): - """Implement the ``list`` interface by getting elements from a parent. - - This is created by a :class:`.SmartList` object when slicing. It does not - actually store the list at any time; instead, whenever the list is needed, - it builds it dynamically using the :meth:`_render` method. - """ - - def __init__(self, parent, sliceinfo): - super(_ListProxy, self).__init__() - self._parent = parent - self._sliceinfo = sliceinfo - - def __repr__(self): - return repr(self._render()) - - def __lt__(self, other): - if isinstance(other, _ListProxy): - return self._render() < list(other) - return self._render() < other - - def __le__(self, other): - if isinstance(other, _ListProxy): - return self._render() <= list(other) - return self._render() <= other - - def __eq__(self, other): - if isinstance(other, _ListProxy): - return self._render() == list(other) - return self._render() == other - - def __ne__(self, other): - if isinstance(other, _ListProxy): - return self._render() != list(other) - return self._render() != other - - def __gt__(self, other): - if isinstance(other, _ListProxy): - return self._render() > list(other) - return self._render() > other - - def __ge__(self, other): - if isinstance(other, _ListProxy): - return self._render() >= list(other) - return self._render() >= other - - if py3k: - def __bool__(self): - return bool(self._render()) - else: - def __nonzero__(self): - return bool(self._render()) - - def __len__(self): - return max((self._stop - self._start) // self._step, 0) - - def __getitem__(self, key): - if isinstance(key, slice): - key = self._normalize_slice(key, clamp=True) - keystart = min(self._start + key.start, self._stop) - keystop = min(self._start + key.stop, self._stop) - adjusted = slice(keystart, keystop, key.step) - return self._parent[adjusted] - else: - return self._render()[key] - - def __setitem__(self, key, item): - if isinstance(key, slice): - key = self._normalize_slice(key, clamp=True) - keystart = min(self._start + key.start, self._stop) - keystop = min(self._start + key.stop, self._stop) - adjusted = slice(keystart, keystop, key.step) - self._parent[adjusted] = item - else: - length = len(self) - if key < 0: - key = length + key - if key < 0 or key >= length: - raise IndexError("list assignment index out of range") - self._parent[self._start + key] = item - - def __delitem__(self, key): - if isinstance(key, slice): - key = self._normalize_slice(key, clamp=True) - keystart = min(self._start + key.start, self._stop) - keystop = min(self._start + key.stop, self._stop) - adjusted = slice(keystart, keystop, key.step) - del self._parent[adjusted] - else: - length = len(self) - if key < 0: - key = length + key - if key < 0 or key >= length: - raise IndexError("list assignment index out of range") - del self._parent[self._start + key] - - def __iter__(self): - i = self._start - while i < self._stop: - yield self._parent[i] - i += self._step - - def __reversed__(self): - i = self._stop - 1 - while i >= self._start: - yield self._parent[i] - i -= self._step - - def __contains__(self, item): - return item in self._render() - - if not py3k: - def __getslice__(self, start, stop): - return self.__getitem__(slice(start, stop)) - - def __setslice__(self, start, stop, iterable): - self.__setitem__(slice(start, stop), iterable) - - def __delslice__(self, start, stop): - self.__delitem__(slice(start, stop)) - - def __add__(self, other): - return SmartList(list(self) + other) - - def __radd__(self, other): - return SmartList(other + list(self)) - - def __iadd__(self, other): - self.extend(other) - return self - - def __mul__(self, other): - return SmartList(list(self) * other) - - def __rmul__(self, other): - return SmartList(other * list(self)) - - def __imul__(self, other): - self.extend(list(self) * (other - 1)) - return self - - @property - def _start(self): - """The starting index of this list, inclusive.""" - return self._sliceinfo[0] - - @property - def _stop(self): - """The ending index of this list, exclusive.""" - if self._sliceinfo[1] is None: - return len(self._parent) - return self._sliceinfo[1] - - @property - def _step(self): - """The number to increase the index by between items.""" - return self._sliceinfo[2] - - def _render(self): - """Return the actual list from the stored start/stop/step.""" - return list(self._parent)[self._start:self._stop:self._step] - - @inheritdoc - def append(self, item): - self._parent.insert(self._stop, item) - - @inheritdoc - def count(self, item): - return self._render().count(item) - - @inheritdoc - def index(self, item, start=None, stop=None): - if start is not None: - if stop is not None: - return self._render().index(item, start, stop) - return self._render().index(item, start) - return self._render().index(item) - - @inheritdoc - def extend(self, item): - self._parent[self._stop:self._stop] = item - - @inheritdoc - def insert(self, index, item): - if index < 0: - index = len(self) + index - self._parent.insert(self._start + index, item) - - @inheritdoc - def pop(self, index=None): - length = len(self) - if index is None: - index = length - 1 - elif index < 0: - index = length + index - if index < 0 or index >= length: - raise IndexError("pop index out of range") - return self._parent.pop(self._start + index) - - @inheritdoc - def remove(self, item): - index = self.index(item) - del self._parent[self._start + index] - - @inheritdoc - def reverse(self): - item = self._render() - item.reverse() - self._parent[self._start:self._stop:self._step] = item - - if py3k: - @inheritdoc - def sort(self, key=None, reverse=None): - item = self._render() - kwargs = {} - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - item.sort(**kwargs) - self._parent[self._start:self._stop:self._step] = item - else: - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - item = self._render() - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - item.sort(**kwargs) - self._parent[self._start:self._stop:self._step] = item - - -del inheritdoc diff --git a/mwparserfromhell/smart_list/ListProxy.py b/mwparserfromhell/smart_list/ListProxy.py new file mode 100644 index 0000000..2e3b94a --- /dev/null +++ b/mwparserfromhell/smart_list/ListProxy.py @@ -0,0 +1,240 @@ +# SmartList has to be a full import in order to avoid cyclical import errors +import mwparserfromhell.smart_list.SmartList +from .utils import _SliceNormalizerMixIn, inheritdoc +from ..compat import py3k + + +class _ListProxy(_SliceNormalizerMixIn, list): + """Implement the ``list`` interface by getting elements from a parent. + + This is created by a :class:`.SmartList` object when slicing. It does not + actually store the list at any time; instead, whenever the list is needed, + it builds it dynamically using the :meth:`_render` method. + """ + + def __init__(self, parent, sliceinfo): + super(_ListProxy, self).__init__() + self._parent = parent + self._sliceinfo = sliceinfo + + def __repr__(self): + return repr(self._render()) + + def __lt__(self, other): + if isinstance(other, _ListProxy): + return self._render() < list(other) + return self._render() < other + + def __le__(self, other): + if isinstance(other, _ListProxy): + return self._render() <= list(other) + return self._render() <= other + + def __eq__(self, other): + if isinstance(other, _ListProxy): + return self._render() == list(other) + return self._render() == other + + def __ne__(self, other): + if isinstance(other, _ListProxy): + return self._render() != list(other) + return self._render() != other + + def __gt__(self, other): + if isinstance(other, _ListProxy): + return self._render() > list(other) + return self._render() > other + + def __ge__(self, other): + if isinstance(other, _ListProxy): + return self._render() >= list(other) + return self._render() >= other + + if py3k: + def __bool__(self): + return bool(self._render()) + else: + def __nonzero__(self): + return bool(self._render()) + + def __len__(self): + return max((self._stop - self._start) // self._step, 0) + + def __getitem__(self, key): + if isinstance(key, slice): + key = self._normalize_slice(key, clamp=True) + keystart = min(self._start + key.start, self._stop) + keystop = min(self._start + key.stop, self._stop) + adjusted = slice(keystart, keystop, key.step) + return self._parent[adjusted] + else: + return self._render()[key] + + def __setitem__(self, key, item): + if isinstance(key, slice): + key = self._normalize_slice(key, clamp=True) + keystart = min(self._start + key.start, self._stop) + keystop = min(self._start + key.stop, self._stop) + adjusted = slice(keystart, keystop, key.step) + self._parent[adjusted] = item + else: + length = len(self) + if key < 0: + key = length + key + if key < 0 or key >= length: + raise IndexError("list assignment index out of range") + self._parent[self._start + key] = item + + def __delitem__(self, key): + if isinstance(key, slice): + key = self._normalize_slice(key, clamp=True) + keystart = min(self._start + key.start, self._stop) + keystop = min(self._start + key.stop, self._stop) + adjusted = slice(keystart, keystop, key.step) + del self._parent[adjusted] + else: + length = len(self) + if key < 0: + key = length + key + if key < 0 or key >= length: + raise IndexError("list assignment index out of range") + del self._parent[self._start + key] + + def __iter__(self): + i = self._start + while i < self._stop: + yield self._parent[i] + i += self._step + + def __reversed__(self): + i = self._stop - 1 + while i >= self._start: + yield self._parent[i] + i -= self._step + + def __contains__(self, item): + return item in self._render() + + if not py3k: + def __getslice__(self, start, stop): + return self.__getitem__(slice(start, stop)) + + def __setslice__(self, start, stop, iterable): + self.__setitem__(slice(start, stop), iterable) + + def __delslice__(self, start, stop): + self.__delitem__(slice(start, stop)) + + def __add__(self, other): + return mwparserfromhell.smart_list.SmartList(list(self) + other) + + def __radd__(self, other): + return mwparserfromhell.smart_list.SmartList(other + list(self)) + + def __iadd__(self, other): + self.extend(other) + return self + + def __mul__(self, other): + return mwparserfromhell.smart_list.SmartList(list(self) * other) + + def __rmul__(self, other): + return mwparserfromhell.smart_list.SmartList(other * list(self)) + + def __imul__(self, other): + self.extend(list(self) * (other - 1)) + return self + + @property + def _start(self): + """The starting index of this list, inclusive.""" + return self._sliceinfo[0] + + @property + def _stop(self): + """The ending index of this list, exclusive.""" + if self._sliceinfo[1] is None: + return len(self._parent) + return self._sliceinfo[1] + + @property + def _step(self): + """The number to increase the index by between items.""" + return self._sliceinfo[2] + + def _render(self): + """Return the actual list from the stored start/stop/step.""" + return list(self._parent)[self._start:self._stop:self._step] + + @inheritdoc + def append(self, item): + self._parent.insert(self._stop, item) + + @inheritdoc + def count(self, item): + return self._render().count(item) + + @inheritdoc + def index(self, item, start=None, stop=None): + if start is not None: + if stop is not None: + return self._render().index(item, start, stop) + return self._render().index(item, start) + return self._render().index(item) + + @inheritdoc + def extend(self, item): + self._parent[self._stop:self._stop] = item + + @inheritdoc + def insert(self, index, item): + if index < 0: + index = len(self) + index + self._parent.insert(self._start + index, item) + + @inheritdoc + def pop(self, index=None): + length = len(self) + if index is None: + index = length - 1 + elif index < 0: + index = length + index + if index < 0 or index >= length: + raise IndexError("pop index out of range") + return self._parent.pop(self._start + index) + + @inheritdoc + def remove(self, item): + index = self.index(item) + del self._parent[self._start + index] + + @inheritdoc + def reverse(self): + item = self._render() + item.reverse() + self._parent[self._start:self._stop:self._step] = item + + if py3k: + @inheritdoc + def sort(self, key=None, reverse=None): + item = self._render() + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item + else: + @inheritdoc + def sort(self, cmp=None, key=None, reverse=None): + item = self._render() + kwargs = {} + if cmp is not None: + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item diff --git a/mwparserfromhell/smart_list/SmartList.py b/mwparserfromhell/smart_list/SmartList.py new file mode 100644 index 0000000..e442252 --- /dev/null +++ b/mwparserfromhell/smart_list/SmartList.py @@ -0,0 +1,162 @@ +from _weakref import ref + +from .ListProxy import _ListProxy +from .utils import _SliceNormalizerMixIn, inheritdoc +from ..compat import py3k + + +class SmartList(_SliceNormalizerMixIn, list): + """Implements the ``list`` interface with special handling of sublists. + + When a sublist is created (by ``list[i:j]``), any changes made to this + list (such as the addition, removal, or replacement of elements) will be + reflected in the sublist, or vice-versa, to the greatest degree possible. + This is implemented by having sublists - instances of the + :class:`._ListProxy` type - dynamically determine their elements by storing + their slice info and retrieving that slice from the parent. Methods that + change the size of the list also change the slice info. For example:: + + >>> parent = SmartList([0, 1, 2, 3]) + >>> parent + [0, 1, 2, 3] + >>> child = parent[2:] + >>> child + [2, 3] + >>> child.append(4) + >>> child + [2, 3, 4] + >>> parent + [0, 1, 2, 3, 4] + """ + + def __init__(self, iterable=None): + if iterable: + super(SmartList, self).__init__(iterable) + else: + super(SmartList, self).__init__() + self._children = {} + + def __getitem__(self, key): + if not isinstance(key, slice): + return super(SmartList, self).__getitem__(key) + key = self._normalize_slice(key, clamp=False) + sliceinfo = [key.start, key.stop, key.step] + child = _ListProxy(self, sliceinfo) + child_ref = ref(child, self._delete_child) + self._children[id(child_ref)] = (child_ref, sliceinfo) + return child + + def __setitem__(self, key, item): + if not isinstance(key, slice): + return super(SmartList, self).__setitem__(key, item) + item = list(item) + super(SmartList, self).__setitem__(key, item) + key = self._normalize_slice(key, clamp=True) + diff = len(item) + (key.start - key.stop) // key.step + if not diff: + return + values = self._children.values if py3k else self._children.itervalues + for child, (start, stop, step) in values(): + if start > key.stop: + self._children[id(child)][1][0] += diff + if stop is not None and stop >= key.stop: + self._children[id(child)][1][1] += diff + + def __delitem__(self, key): + super(SmartList, self).__delitem__(key) + if isinstance(key, slice): + key = self._normalize_slice(key, clamp=True) + else: + key = slice(key, key + 1, 1) + diff = (key.stop - key.start) // key.step + values = self._children.values if py3k else self._children.itervalues + for child, (start, stop, step) in values(): + if start > key.start: + self._children[id(child)][1][0] -= diff + if stop is not None and stop >= key.stop: + self._children[id(child)][1][1] -= diff + + if not py3k: + def __getslice__(self, start, stop): + return self.__getitem__(slice(start, stop)) + + def __setslice__(self, start, stop, iterable): + self.__setitem__(slice(start, stop), iterable) + + def __delslice__(self, start, stop): + self.__delitem__(slice(start, stop)) + + def __add__(self, other): + return SmartList(list(self) + other) + + def __radd__(self, other): + return SmartList(other + list(self)) + + def __iadd__(self, other): + self.extend(other) + return self + + def _delete_child(self, child_ref): + """Remove a child reference that is about to be garbage-collected.""" + del self._children[id(child_ref)] + + def _detach_children(self): + """Remove all children and give them independent parent copies.""" + children = [val[0] for val in self._children.values()] + for child in children: + child()._parent = list(self) + self._children.clear() + + @inheritdoc + def append(self, item): + head = len(self) + self[head:head] = [item] + + @inheritdoc + def extend(self, item): + head = len(self) + self[head:head] = item + + @inheritdoc + def insert(self, index, item): + self[index:index] = [item] + + @inheritdoc + def pop(self, index=None): + if index is None: + index = len(self) - 1 + item = self[index] + del self[index] + return item + + @inheritdoc + def remove(self, item): + del self[self.index(item)] + + @inheritdoc + def reverse(self): + self._detach_children() + super(SmartList, self).reverse() + + if py3k: + @inheritdoc + def sort(self, key=None, reverse=None): + self._detach_children() + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) + else: + @inheritdoc + def sort(self, cmp=None, key=None, reverse=None): + self._detach_children() + kwargs = {} + if cmp is not None: + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) diff --git a/mwparserfromhell/smart_list/__init__.py b/mwparserfromhell/smart_list/__init__.py new file mode 100644 index 0000000..302c9f8 --- /dev/null +++ b/mwparserfromhell/smart_list/__init__.py @@ -0,0 +1 @@ +from .SmartList import SmartList diff --git a/mwparserfromhell/smart_list/utils.py b/mwparserfromhell/smart_list/utils.py new file mode 100644 index 0000000..48ab0d8 --- /dev/null +++ b/mwparserfromhell/smart_list/utils.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2016 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +This module contains the :class:`.SmartList` type, as well as its +:class:`._ListProxy` child, which together implement a list whose sublists +reflect changes made to the main list, and vice-versa. +""" + +from __future__ import unicode_literals + +from sys import maxsize + +__all__ = [] + + +def inheritdoc(method): + """Set __doc__ of *method* to __doc__ of *method* in its parent class. + + Since this is used on :class:`.SmartList`, the "parent class" used is + ``list``. This function can be used as a decorator. + """ + method.__doc__ = getattr(list, method.__name__).__doc__ + return method + + +class _SliceNormalizerMixIn(object): + """MixIn that provides a private method to normalize slices.""" + + def _normalize_slice(self, key, clamp=False): + """Return a slice equivalent to the input *key*, standardized.""" + if key.start is None: + start = 0 + else: + start = (len(self) + key.start) if key.start < 0 else key.start + if key.stop is None or key.stop == maxsize: + stop = len(self) if clamp else None + else: + stop = (len(self) + key.stop) if key.stop < 0 else key.stop + return slice(start, stop, key.step or 1) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 840d8ed..1a966e2 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -21,13 +21,14 @@ # SOFTWARE. from __future__ import unicode_literals -from itertools import chain + import re +from itertools import chain from .compat import bytes, py3k, range, str from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) -from .smart_list import _ListProxy +from .smart_list.ListProxy import _ListProxy from .string_mixin import StringMixIn from .utils import parse_anything diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 3c9f711..8deddd5 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -21,10 +21,13 @@ # SOFTWARE. from __future__ import unicode_literals + import unittest from mwparserfromhell.compat import py3k, range -from mwparserfromhell.smart_list import SmartList, _ListProxy +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.smart_list.ListProxy import _ListProxy + class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" From 406562415032328f5f54f738cda8f976e67b3ebc Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Wed, 1 Jan 2020 23:23:30 -0500 Subject: [PATCH 14/31] updated comment/license --- mwparserfromhell/smart_list/ListProxy.py | 23 +++++++++++++++++++++++ mwparserfromhell/smart_list/SmartList.py | 23 +++++++++++++++++++++++ mwparserfromhell/smart_list/__init__.py | 29 +++++++++++++++++++++++++++++ mwparserfromhell/smart_list/utils.py | 7 +------ 4 files changed, 76 insertions(+), 6 deletions(-) diff --git a/mwparserfromhell/smart_list/ListProxy.py b/mwparserfromhell/smart_list/ListProxy.py index 2e3b94a..6d4b85c 100644 --- a/mwparserfromhell/smart_list/ListProxy.py +++ b/mwparserfromhell/smart_list/ListProxy.py @@ -1,3 +1,26 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2019-2020 Yuri Astrakhan +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + # SmartList has to be a full import in order to avoid cyclical import errors import mwparserfromhell.smart_list.SmartList from .utils import _SliceNormalizerMixIn, inheritdoc diff --git a/mwparserfromhell/smart_list/SmartList.py b/mwparserfromhell/smart_list/SmartList.py index e442252..30d2b1e 100644 --- a/mwparserfromhell/smart_list/SmartList.py +++ b/mwparserfromhell/smart_list/SmartList.py @@ -1,3 +1,26 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2019-2020 Yuri Astrakhan +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + from _weakref import ref from .ListProxy import _ListProxy diff --git a/mwparserfromhell/smart_list/__init__.py b/mwparserfromhell/smart_list/__init__.py index 302c9f8..81d4fb1 100644 --- a/mwparserfromhell/smart_list/__init__.py +++ b/mwparserfromhell/smart_list/__init__.py @@ -1 +1,30 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2019-2020 Yuri Astrakhan +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +This module contains the :class:`.SmartList` type, as well as its +:class:`._ListProxy` child, which together implement a list whose sublists +reflect changes made to the main list, and vice-versa. +""" + from .SmartList import SmartList diff --git a/mwparserfromhell/smart_list/utils.py b/mwparserfromhell/smart_list/utils.py index 48ab0d8..609b095 100644 --- a/mwparserfromhell/smart_list/utils.py +++ b/mwparserfromhell/smart_list/utils.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,12 +21,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -""" -This module contains the :class:`.SmartList` type, as well as its -:class:`._ListProxy` child, which together implement a list whose sublists -reflect changes made to the main list, and vice-versa. -""" - from __future__ import unicode_literals from sys import maxsize From 7e5297fbe6fde6b9661fd6703cd8b6e711292a58 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Sun, 5 Jan 2020 21:56:06 -0800 Subject: [PATCH 15/31] Drop Python 2 support Fixes #221. --- .travis.yml | 1 - README.rst | 6 +- appveyor.yml | 8 -- docs/index.rst | 2 +- docs/usage.rst | 4 +- mwparserfromhell/__init__.py | 3 +- mwparserfromhell/compat.py | 27 ------ mwparserfromhell/definitions.py | 2 - mwparserfromhell/nodes/__init__.py | 3 - mwparserfromhell/nodes/argument.py | 5 +- mwparserfromhell/nodes/comment.py | 5 +- mwparserfromhell/nodes/external_link.py | 5 +- mwparserfromhell/nodes/extras/__init__.py | 1 - mwparserfromhell/nodes/extras/attribute.py | 5 +- mwparserfromhell/nodes/extras/parameter.py | 5 +- mwparserfromhell/nodes/heading.py | 5 +- mwparserfromhell/nodes/html_entity.py | 39 ++------- mwparserfromhell/nodes/tag.py | 5 +- mwparserfromhell/nodes/template.py | 5 +- mwparserfromhell/nodes/text.py | 5 +- mwparserfromhell/nodes/wikilink.py | 5 +- mwparserfromhell/parser/__init__.py | 5 +- mwparserfromhell/parser/builder.py | 5 +- mwparserfromhell/parser/contexts.py | 1 - mwparserfromhell/parser/ctokenizer/common.h | 26 +----- mwparserfromhell/parser/ctokenizer/tag_data.h | 2 +- mwparserfromhell/parser/ctokenizer/textbuffer.c | 55 ++---------- mwparserfromhell/parser/ctokenizer/textbuffer.h | 4 +- mwparserfromhell/parser/ctokenizer/tok_parse.c | 56 ++++++------ mwparserfromhell/parser/ctokenizer/tok_parse.h | 2 +- mwparserfromhell/parser/ctokenizer/tok_support.c | 12 +-- mwparserfromhell/parser/ctokenizer/tok_support.h | 6 +- mwparserfromhell/parser/ctokenizer/tokenizer.c | 34 ++----- mwparserfromhell/parser/ctokenizer/tokenizer.h | 18 ---- mwparserfromhell/parser/tokenizer.py | 10 +-- mwparserfromhell/parser/tokens.py | 6 +- mwparserfromhell/smart_list/ListProxy.py | 56 +++--------- mwparserfromhell/smart_list/SmartList.py | 66 ++++---------- mwparserfromhell/smart_list/__init__.py | 1 - mwparserfromhell/smart_list/utils.py | 6 +- mwparserfromhell/string_mixin.py | 36 +++----- mwparserfromhell/utils.py | 3 - mwparserfromhell/wikicode.py | 11 +-- scripts/memtest.py | 3 - setup.py | 12 +-- tests/__init__.py | 1 - tests/_test_tokenizer.py | 7 +- tests/_test_tree_equality.py | 3 - tests/compat.py | 18 ---- tests/test_argument.py | 3 - tests/test_attribute.py | 3 - tests/test_builder.py | 6 +- tests/test_comment.py | 3 - tests/test_ctokenizer.py | 2 - tests/test_docs.py | 40 +++------ tests/test_external_link.py | 3 - tests/test_heading.py | 3 - tests/test_html_entity.py | 3 - tests/test_parameter.py | 3 - tests/test_parser.py | 3 - tests/test_pytokenizer.py | 2 - tests/test_roundtripping.py | 2 - tests/test_smart_list.py | 21 +---- tests/test_string_mixin.py | 107 +++++++++-------------- tests/test_tag.py | 3 - tests/test_template.py | 3 - tests/test_text.py | 3 - tests/test_tokens.py | 14 +-- tests/test_utils.py | 2 - tests/test_wikicode.py | 3 - tests/test_wikilink.py | 3 - 71 files changed, 189 insertions(+), 657 deletions(-) delete mode 100644 mwparserfromhell/compat.py delete mode 100644 tests/compat.py diff --git a/.travis.yml b/.travis.yml index 0ecf3fe..bee8152 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ dist: xenial language: python python: - - 2.7 - 3.4 - 3.5 - 3.6 diff --git a/README.rst b/README.rst index a94f4e2..98af7a4 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ mwparserfromhell **mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for MediaWiki_ -wikicode. It supports Python 2 and Python 3. +wikicode. It supports Python 3.4+. Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. Full documentation is available on ReadTheDocs_. Development occurs on GitHub_. @@ -41,7 +41,7 @@ Normal usage is rather straightforward (where ``text`` is page text): >>> wikicode = mwparserfromhell.parse(text) ``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an -ordinary ``str`` object (or ``unicode`` in Python 2) with some extra methods. +ordinary ``str`` object with some extra methods. For example: >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" @@ -111,8 +111,6 @@ saving the page!) by calling ``str()`` on it: >>> text == code True -Likewise, use ``unicode(code)`` in Python 2. - Limitations ----------- diff --git a/appveyor.yml b/appveyor.yml index 20f9e35..2a4de47 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,14 +22,6 @@ environment: secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+ matrix: - - PYTHON: "C:\\Python27" - PYTHON_VERSION: "2.7" - PYTHON_ARCH: "32" - - - PYTHON: "C:\\Python27-x64" - PYTHON_VERSION: "2.7" - PYTHON_ARCH: "64" - - PYTHON: "C:\\Python34" PYTHON_VERSION: "3.4" PYTHON_ARCH: "32" diff --git a/docs/index.rst b/docs/index.rst index 8a1621f..1ca69f6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,7 @@ MWParserFromHell v\ |version| Documentation :mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for MediaWiki_ -wikicode. It supports Python 2 and Python 3. +wikicode. It supports Python 3.4+. Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. Development occurs on GitHub_. diff --git a/docs/usage.rst b/docs/usage.rst index ee667fd..2cdc690 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -7,8 +7,7 @@ Normal usage is rather straightforward (where ``text`` is page text):: >>> wikicode = mwparserfromhell.parse(text) ``wikicode`` is a :class:`mwparserfromhell.Wikicode <.Wikicode>` object, which -acts like an ordinary ``str`` object (or ``unicode`` in Python 2) with some -extra methods. For example:: +acts like an ordinary ``str`` object with some extra methods. For example:: >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" >>> wikicode = mwparserfromhell.parse(text) @@ -78,7 +77,6 @@ saving the page!) by calling :func:`str` on it:: >>> text == code True -(Likewise, use :func:`unicode(code) ` in Python 2.) For more tips, check out :class:`Wikicode's full method list <.Wikicode>` and the :mod:`list of Nodes <.nodes>`. diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index f867e26..6056b83 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -32,7 +31,7 @@ __license__ = "MIT License" __version__ = "0.6.dev0" __email__ = "ben.kurtovic@gmail.com" -from . import (compat, definitions, nodes, parser, smart_list, string_mixin, +from . import (definitions, nodes, parser, smart_list, string_mixin, utils, wikicode) parse = utils.parse_anything diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py deleted file mode 100644 index 85f9d48..0000000 --- a/mwparserfromhell/compat.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Implements support for both Python 2 and Python 3 by defining common types in -terms of their Python 2/3 variants. For example, :class:`str` is set to -:class:`unicode` on Python 2 but :class:`str` on Python 3; likewise, -:class:`bytes` is :class:`str` on 2 but :class:`bytes` on 3. These types are -meant to be imported directly from within the parser's modules. -""" - -import sys - -py3k = (sys.version_info[0] == 3) - -if py3k: - bytes = bytes - str = str - range = range - import html.entities as htmlentities - -else: - bytes = str - str = unicode - range = xrange - import htmlentitydefs as htmlentities - -del sys diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py index 4399970..6191dc6 100644 --- a/mwparserfromhell/definitions.py +++ b/mwparserfromhell/definitions.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -28,7 +27,6 @@ When updating this file, please also update the the C tokenizer version: - mwparserfromhell/parser/ctokenizer/definitions.h """ -from __future__ import unicode_literals __all__ = ["get_html_tag", "is_parsable", "is_visible", "is_single", "is_single_only", "is_scheme"] diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 1e38254..6aa6ea4 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -29,9 +28,7 @@ the name of a :class:`.Template` is a :class:`.Wikicode` object that can contain text or more templates. """ -from __future__ import unicode_literals -from ..compat import str from ..string_mixin import StringMixIn __all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index 2da1467..4d9d613 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str from ..utils import parse_anything __all__ = ["Argument"] @@ -32,7 +29,7 @@ class Argument(Node): """Represents a template argument substitution, like ``{{{foo}}}``.""" def __init__(self, name, default=None): - super(Argument, self).__init__() + super().__init__() self.name = name self.default = default diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index 40224ba..302699e 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str __all__ = ["Comment"] @@ -31,7 +28,7 @@ class Comment(Node): """Represents a hidden HTML comment, like ````.""" def __init__(self, contents): - super(Comment, self).__init__() + super().__init__() self.contents = contents def __unicode__(self): diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index 22b2ef7..4dc3594 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str from ..utils import parse_anything __all__ = ["ExternalLink"] @@ -32,7 +29,7 @@ class ExternalLink(Node): """Represents an external link, like ``[http://example.com/ Example]``.""" def __init__(self, url, title=None, brackets=True): - super(ExternalLink, self).__init__() + super().__init__() self.url = url self.title = title self.brackets = brackets diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py index 9347c61..43fe862 100644 --- a/mwparserfromhell/nodes/extras/__init__.py +++ b/mwparserfromhell/nodes/extras/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 4312199..38d2423 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,9 +19,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals -from ...compat import str from ...string_mixin import StringMixIn from ...utils import parse_anything @@ -38,7 +35,7 @@ class Attribute(StringMixIn): def __init__(self, name, value=None, quotes='"', pad_first=" ", pad_before_eq="", pad_after_eq=""): - super(Attribute, self).__init__() + super().__init__() self.name = name self._quotes = None self.value = value diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index fb0aac0..4478084 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import re -from ...compat import str from ...string_mixin import StringMixIn from ...utils import parse_anything @@ -39,7 +36,7 @@ class Parameter(StringMixIn): """ def __init__(self, name, value, showkey=True): - super(Parameter, self).__init__() + super().__init__() self.name = name self.value = value self.showkey = showkey diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 426e742..1fe8790 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str from ..utils import parse_anything __all__ = ["Heading"] @@ -32,7 +29,7 @@ class Heading(Node): """Represents a section heading in wikicode, like ``== Foo ==``.""" def __init__(self, title, level): - super(Heading, self).__init__() + super().__init__() self.title = title self.level = level diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index ea534e9..8a2eef4 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals +import html.entities as htmlentities from . import Node -from ..compat import htmlentities, py3k, str __all__ = ["HTMLEntity"] @@ -31,7 +29,7 @@ class HTMLEntity(Node): """Represents an HTML entity, like `` ``, either named or unnamed.""" def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): - super(HTMLEntity, self).__init__() + super().__init__() self._value = value if named is None: # Try to guess whether or not the entity is named try: @@ -63,32 +61,6 @@ class HTMLEntity(Node): return self.normalize() return self - if not py3k: - @staticmethod - def _unichr(value): - """Implement builtin unichr() with support for non-BMP code points. - - On wide Python builds, this functions like the normal unichr(). On - narrow builds, this returns the value's encoded surrogate pair. - """ - try: - return unichr(value) - except ValueError: - # Test whether we're on the wide or narrow Python build. Check - # the length of a non-BMP code point - # (U+1F64A, SPEAK-NO-EVIL MONKEY): - if len("\U0001F64A") == 1: # pragma: no cover - raise - # Ensure this is within the range we can encode: - if value > 0x10FFFF: - raise ValueError("unichr() arg not in range(0x110000)") - code = value - 0x10000 - if value < 0: # Invalid code point - raise - lead = 0xD800 + (code >> 10) - trail = 0xDC00 + (code % (1 << 10)) - return unichr(lead) + unichr(trail) - @property def value(self): """The string value of the HTML entity.""" @@ -173,9 +145,8 @@ class HTMLEntity(Node): def normalize(self): """Return the unicode character represented by the HTML entity.""" - chrfunc = chr if py3k else HTMLEntity._unichr if self.named: - return chrfunc(htmlentities.name2codepoint[self.value]) + return chr(htmlentities.name2codepoint[self.value]) if self.hexadecimal: - return chrfunc(int(self.value, 16)) - return chrfunc(int(self.value)) + return chr(int(self.value, 16)) + return chr(int(self.value)) diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 70a2876..9fa45c5 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,11 +19,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node from .extras import Attribute -from ..compat import str from ..definitions import is_visible from ..utils import parse_anything @@ -37,7 +34,7 @@ class Tag(Node): self_closing=False, invalid=False, implicit=False, padding="", closing_tag=None, wiki_style_separator=None, closing_wiki_markup=None): - super(Tag, self).__init__() + super().__init__() self.tag = tag self.contents = contents self._attrs = attrs if attrs else [] diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 11bccc4..34cb1e9 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,13 +19,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from collections import defaultdict import re from . import HTMLEntity, Node, Text from .extras import Parameter -from ..compat import range, str from ..utils import parse_anything __all__ = ["Template"] @@ -37,7 +34,7 @@ class Template(Node): """Represents a template in wikicode, like ``{{foo}}``.""" def __init__(self, name, params=None): - super(Template, self).__init__() + super().__init__() self.name = name if params: self._params = params diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 1c47c7b..b07eedc 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str __all__ = ["Text"] @@ -31,7 +28,7 @@ class Text(Node): """Represents ordinary, unformatted text with no special properties.""" def __init__(self, value): - super(Text, self).__init__() + super().__init__() self.value = value def __unicode__(self): diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 265a100..98ae75f 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import Node -from ..compat import str from ..utils import parse_anything __all__ = ["Wikilink"] @@ -32,7 +29,7 @@ class Wikilink(Node): """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" def __init__(self, title, text=None): - super(Wikilink, self).__init__() + super().__init__() self.title = title self.text = text diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index dc769a2..fb1bf20 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -36,7 +35,7 @@ class ParserError(Exception): """ def __init__(self, extra): msg = "This is a bug and should be reported. Info: {}.".format(extra) - super(ParserError, self).__init__(msg) + super().__init__(msg) from .builder import Builder @@ -50,7 +49,7 @@ except ImportError: __all__ = ["use_c", "Parser", "ParserError"] -class Parser(object): +class Parser: """Represents a parser for wikicode. Actual parsing is a two-step process: first, the text is split up into a diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index f1b9689..1ae2150 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from . import tokens, ParserError -from ..compat import str from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from ..nodes.extras import Attribute, Parameter @@ -45,7 +42,7 @@ def _add_handler(token_type): return decorator -class Builder(object): +class Builder: """Builds a tree of nodes out of a sequence of tokens. To use, pass a list of :class:`.Token`\\ s to the :meth:`build` method. The diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index cac5250..b6d013e 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # diff --git a/mwparserfromhell/parser/ctokenizer/common.h b/mwparserfromhell/parser/ctokenizer/common.h index f3d51f4..22a6b81 100644 --- a/mwparserfromhell/parser/ctokenizer/common.h +++ b/mwparserfromhell/parser/ctokenizer/common.h @@ -23,7 +23,7 @@ SOFTWARE. #pragma once #ifndef PY_SSIZE_T_CLEAN -#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/2/c-api/arg.html +#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/3/c-api/arg.html #endif #include @@ -34,10 +34,6 @@ SOFTWARE. /* Compatibility macros */ -#if PY_MAJOR_VERSION >= 3 -#define IS_PY3K -#endif - #ifndef uint64_t #define uint64_t unsigned PY_LONG_LONG #endif @@ -48,20 +44,8 @@ SOFTWARE. /* Unicode support macros */ -#if defined(IS_PY3K) && PY_MINOR_VERSION >= 3 -#define PEP_393 -#endif - -#ifdef PEP_393 -#define Unicode Py_UCS4 #define PyUnicode_FROM_SINGLE(chr) \ PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &(chr), 1) -#else -#define Unicode Py_UNICODE -#define PyUnicode_FROM_SINGLE(chr) \ - PyUnicode_FromUnicode(&(chr), 1) -#define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE -#endif /* Error handling macros */ @@ -85,13 +69,9 @@ extern PyObject* definitions; typedef struct { Py_ssize_t capacity; Py_ssize_t length; -#ifdef PEP_393 PyObject* object; int kind; void* data; -#else - Py_UNICODE* data; -#endif } Textbuffer; typedef struct { @@ -111,12 +91,8 @@ typedef struct Stack Stack; typedef struct { PyObject* object; /* base PyUnicodeObject object */ Py_ssize_t length; /* length of object, in code points */ -#ifdef PEP_393 int kind; /* object's kind value */ void* data; /* object's raw unicode buffer */ -#else - Py_UNICODE* buf; /* object's internal buffer */ -#endif } TokenizerInput; typedef struct avl_tree_node avl_tree; diff --git a/mwparserfromhell/parser/ctokenizer/tag_data.h b/mwparserfromhell/parser/ctokenizer/tag_data.h index c2e9303..7e8edcb 100644 --- a/mwparserfromhell/parser/ctokenizer/tag_data.h +++ b/mwparserfromhell/parser/ctokenizer/tag_data.h @@ -32,7 +32,7 @@ typedef struct { Textbuffer* pad_first; Textbuffer* pad_before_eq; Textbuffer* pad_after_eq; - Unicode quoter; + Py_UCS4 quoter; Py_ssize_t reset; } TagData; diff --git a/mwparserfromhell/parser/ctokenizer/textbuffer.c b/mwparserfromhell/parser/ctokenizer/textbuffer.c index 3fd129f..e37b7c3 100644 --- a/mwparserfromhell/parser/ctokenizer/textbuffer.c +++ b/mwparserfromhell/parser/ctokenizer/textbuffer.c @@ -29,23 +29,16 @@ SOFTWARE. /* Internal allocation function for textbuffers. */ -static int internal_alloc(Textbuffer* self, Unicode maxchar) +static int internal_alloc(Textbuffer* self, Py_UCS4 maxchar) { self->capacity = INITIAL_CAPACITY; self->length = 0; -#ifdef PEP_393 self->object = PyUnicode_New(self->capacity, maxchar); if (!self->object) return -1; self->kind = PyUnicode_KIND(self->object); self->data = PyUnicode_DATA(self->object); -#else - (void) maxchar; // Unused - self->data = malloc(sizeof(Unicode) * self->capacity); - if (!self->data) - return -1; -#endif return 0; } @@ -55,11 +48,7 @@ static int internal_alloc(Textbuffer* self, Unicode maxchar) */ static void internal_dealloc(Textbuffer* self) { -#ifdef PEP_393 Py_DECREF(self->object); -#else - free(self->data); -#endif } /* @@ -67,7 +56,6 @@ static void internal_dealloc(Textbuffer* self) */ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) { -#ifdef PEP_393 PyObject *newobj; void *newdata; @@ -79,10 +67,6 @@ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) Py_DECREF(self->object); self->object = newobj; self->data = newdata; -#else - if (!(self->data = realloc(self->data, sizeof(Unicode) * new_cap))) - return -1; -#endif self->capacity = new_cap; return 0; @@ -94,11 +78,9 @@ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) Textbuffer* Textbuffer_new(TokenizerInput* text) { Textbuffer* self = malloc(sizeof(Textbuffer)); - Unicode maxchar = 0; + Py_UCS4 maxchar = 0; -#ifdef PEP_393 maxchar = PyUnicode_MAX_CHAR_VALUE(text->object); -#endif if (!self) goto fail_nomem; @@ -127,11 +109,9 @@ void Textbuffer_dealloc(Textbuffer* self) */ int Textbuffer_reset(Textbuffer* self) { - Unicode maxchar = 0; + Py_UCS4 maxchar = 0; -#ifdef PEP_393 maxchar = PyUnicode_MAX_CHAR_VALUE(self->object); -#endif internal_dealloc(self); if (internal_alloc(self, maxchar)) @@ -142,18 +122,14 @@ int Textbuffer_reset(Textbuffer* self) /* Write a Unicode codepoint to the given textbuffer. */ -int Textbuffer_write(Textbuffer* self, Unicode code) +int Textbuffer_write(Textbuffer* self, Py_UCS4 code) { if (self->length >= self->capacity) { if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) return -1; } -#ifdef PEP_393 PyUnicode_WRITE(self->kind, self->data, self->length++, code); -#else - self->data[self->length++] = code; -#endif return 0; } @@ -163,13 +139,9 @@ int Textbuffer_write(Textbuffer* self, Unicode code) This function does not check for bounds. */ -Unicode Textbuffer_read(Textbuffer* self, Py_ssize_t index) +Py_UCS4 Textbuffer_read(Textbuffer* self, Py_ssize_t index) { -#ifdef PEP_393 return PyUnicode_READ(self->kind, self->data, index); -#else - return self->data[index]; -#endif } /* @@ -177,11 +149,7 @@ Unicode Textbuffer_read(Textbuffer* self, Py_ssize_t index) */ PyObject* Textbuffer_render(Textbuffer* self) { -#ifdef PEP_393 return PyUnicode_FromKindAndData(self->kind, self->data, self->length); -#else - return PyUnicode_FromUnicode(self->data, self->length); -#endif } /* @@ -196,14 +164,9 @@ int Textbuffer_concat(Textbuffer* self, Textbuffer* other) return -1; } -#ifdef PEP_393 assert(self->kind == other->kind); memcpy(((Py_UCS1*) self->data) + self->kind * self->length, other->data, other->length * other->kind); -#else - memcpy(self->data + self->length, other->data, - other->length * sizeof(Unicode)); -#endif self->length = newlen; return 0; @@ -215,18 +178,12 @@ int Textbuffer_concat(Textbuffer* self, Textbuffer* other) void Textbuffer_reverse(Textbuffer* self) { Py_ssize_t i, end = self->length - 1; - Unicode tmp; + Py_UCS4 tmp; for (i = 0; i < self->length / 2; i++) { -#ifdef PEP_393 tmp = PyUnicode_READ(self->kind, self->data, i); PyUnicode_WRITE(self->kind, self->data, i, PyUnicode_READ(self->kind, self->data, end - i)); PyUnicode_WRITE(self->kind, self->data, end - i, tmp); -#else - tmp = self->data[i]; - self->data[i] = self->data[end - i]; - self->data[end - i] = tmp; -#endif } } diff --git a/mwparserfromhell/parser/ctokenizer/textbuffer.h b/mwparserfromhell/parser/ctokenizer/textbuffer.h index 35579fd..85b39bc 100644 --- a/mwparserfromhell/parser/ctokenizer/textbuffer.h +++ b/mwparserfromhell/parser/ctokenizer/textbuffer.h @@ -29,8 +29,8 @@ SOFTWARE. Textbuffer* Textbuffer_new(TokenizerInput*); void Textbuffer_dealloc(Textbuffer*); int Textbuffer_reset(Textbuffer*); -int Textbuffer_write(Textbuffer*, Unicode); -Unicode Textbuffer_read(Textbuffer*, Py_ssize_t); +int Textbuffer_write(Textbuffer*, Py_UCS4); +Py_UCS4 Textbuffer_read(Textbuffer*, Py_ssize_t); PyObject* Textbuffer_render(Textbuffer*); int Textbuffer_concat(Textbuffer*, Textbuffer*); void Textbuffer_reverse(Textbuffer*); diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index deac6c5..be7018b 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -52,7 +52,7 @@ static int Tokenizer_parse_tag(Tokenizer*); /* Determine whether the given code point is a marker. */ -static int is_marker(Unicode this) +static int is_marker(Py_UCS4 this) { int i; @@ -442,7 +442,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) static const char* valid = URISCHEME; Textbuffer* buffer; PyObject* scheme; - Unicode this; + Py_UCS4 this; int slashes, i; if (Tokenizer_check_route(self, LC_EXT_LINK_URI) < 0) @@ -463,7 +463,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) while (1) { if (!valid[i]) goto end_of_loop; - if (this == (Unicode) valid[i]) + if (this == (Py_UCS4) valid[i]) break; i++; } @@ -516,7 +516,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) static const char* valid = URISCHEME; Textbuffer *scheme_buffer = Textbuffer_new(&self->text); PyObject *scheme; - Unicode chunk; + Py_UCS4 chunk; Py_ssize_t i; int slashes, j; uint64_t new_context; @@ -536,7 +536,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) FAIL_ROUTE(0); return 0; } - } while (chunk != (Unicode) valid[j++]); + } while (chunk != (Py_UCS4) valid[j++]); Textbuffer_write(scheme_buffer, chunk); } end_of_loop: @@ -580,7 +580,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) Handle text in a free external link, including trailing punctuation. */ static int Tokenizer_handle_free_link_text( - Tokenizer* self, int* parens, Textbuffer* tail, Unicode this) + Tokenizer* self, int* parens, Textbuffer* tail, Py_UCS4 this) { #define PUSH_TAIL_BUFFER(tail, error) \ if (tail && tail->length > 0) { \ @@ -607,10 +607,10 @@ static int Tokenizer_handle_free_link_text( Return whether the current head is the end of a free link. */ static int -Tokenizer_is_free_link(Tokenizer* self, Unicode this, Unicode next) +Tokenizer_is_free_link(Tokenizer* self, Py_UCS4 this, Py_UCS4 next) { // Built from Tokenizer_parse()'s end sentinels: - Unicode after = Tokenizer_read(self, 2); + Py_UCS4 after = Tokenizer_read(self, 2); uint64_t ctx = self->topstack->context; return (!this || this == '\n' || this == '[' || this == ']' || @@ -628,7 +628,7 @@ static PyObject* Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, Textbuffer* extra) { - Unicode this, next; + Py_UCS4 this, next; int parens = 0; if (brackets ? Tokenizer_parse_bracketed_uri_scheme(self) : @@ -816,11 +816,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) if (!heading) { return -1; } -#ifdef IS_PY3K level = PyLong_FromSsize_t(heading->level); -#else - level = PyInt_FromSsize_t(heading->level); -#endif if (!level) { Py_DECREF(heading->title); free(heading); @@ -933,7 +929,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) static int Tokenizer_really_parse_entity(Tokenizer* self) { PyObject *kwargs, *charobj, *textobj; - Unicode this; + Py_UCS4 this; int numeric, hexadecimal, i, j, zeroes, test; char *valid, *text, *buffer, *def; @@ -1014,7 +1010,7 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) while (1) { if (!valid[j]) FAIL_ROUTE_AND_EXIT() - if (this == (Unicode) valid[j]) + if (this == (Py_UCS4) valid[j]) break; j++; } @@ -1111,7 +1107,7 @@ static int Tokenizer_parse_comment(Tokenizer* self) { Py_ssize_t reset = self->head + 3; PyObject *comment; - Unicode this; + Py_UCS4 this; self->head += 4; if (Tokenizer_push(self, 0)) @@ -1211,7 +1207,7 @@ static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data) Handle whitespace inside of an HTML open tag. */ static int Tokenizer_handle_tag_space( - Tokenizer* self, TagData* data, Unicode text) + Tokenizer* self, TagData* data, Py_UCS4 text) { uint64_t ctx = data->context; uint64_t end_of_value = (ctx & TAG_ATTR_VALUE && @@ -1243,9 +1239,9 @@ static int Tokenizer_handle_tag_space( /* Handle regular text inside of an HTML open tag. */ -static int Tokenizer_handle_tag_text(Tokenizer* self, Unicode text) +static int Tokenizer_handle_tag_text(Tokenizer* self, Py_UCS4 text) { - Unicode next = Tokenizer_read(self, 1); + Py_UCS4 next = Tokenizer_read(self, 1); if (!is_marker(text) || !Tokenizer_CAN_RECURSE(self)) return Tokenizer_emit_char(self, text); @@ -1262,7 +1258,7 @@ static int Tokenizer_handle_tag_text(Tokenizer* self, Unicode text) Handle all sorts of text data inside of an HTML open tag. */ static int Tokenizer_handle_tag_data( - Tokenizer* self, TagData* data, Unicode chunk) + Tokenizer* self, TagData* data, Py_UCS4 chunk) { PyObject *trash; int first_time, escaped; @@ -1444,7 +1440,7 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self) { Textbuffer* buffer; PyObject *buf_tmp, *end_tag, *start_tag; - Unicode this, next; + Py_UCS4 this, next; Py_ssize_t reset; int cmp; @@ -1600,7 +1596,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) { TagData *data = TagData_new(&self->text); PyObject *token, *text, *trash; - Unicode this, next; + Py_UCS4 this, next; int can_exit; if (!data) @@ -1686,7 +1682,7 @@ static int Tokenizer_handle_invalid_tag_start(Tokenizer* self) Py_ssize_t reset = self->head + 1, pos = 0; Textbuffer* buf; PyObject *name, *tag; - Unicode this; + Py_UCS4 this; self->head += 2; buf = Textbuffer_new(&self->text); @@ -1988,7 +1984,7 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self) static int Tokenizer_handle_list_marker(Tokenizer* self) { PyObject *kwargs, *markup; - Unicode code = Tokenizer_read(self, 0); + Py_UCS4 code = Tokenizer_read(self, 0); if (code == ';') self->topstack->context |= LC_DLTERM; @@ -2015,7 +2011,7 @@ static int Tokenizer_handle_list_marker(Tokenizer* self) */ static int Tokenizer_handle_list(Tokenizer* self) { - Unicode marker = Tokenizer_read(self, 1); + Py_UCS4 marker = Tokenizer_read(self, 1); if (Tokenizer_handle_list_marker(self)) return -1; @@ -2169,11 +2165,11 @@ Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup, /* Handle style attributes for a table until an ending token. */ -static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Unicode end_token) +static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Py_UCS4 end_token) { TagData *data = TagData_new(&self->text); PyObject *padding, *trash; - Unicode this; + Py_UCS4 this; int can_exit; if (!data) @@ -2483,7 +2479,7 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context) everything is safe, or -1 if the route must be failed. */ static int -Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Unicode data) +Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UCS4 data) { if (context & LC_FAIL_NEXT) return -1; @@ -2568,7 +2564,7 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Unicode data) static int Tokenizer_has_leading_whitespace(Tokenizer* self) { int offset = 1; - Unicode current_character; + Py_UCS4 current_character; while (1) { current_character = Tokenizer_read_backwards(self, offset); if (!current_character || current_character == '\n') @@ -2586,7 +2582,7 @@ static int Tokenizer_has_leading_whitespace(Tokenizer* self) PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) { uint64_t this_context; - Unicode this, next, next_next, last; + Py_UCS4 this, next, next_next, last; PyObject* temp; if (push) { diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.h b/mwparserfromhell/parser/ctokenizer/tok_parse.h index 9d98b00..bdae573 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.h +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.h @@ -24,7 +24,7 @@ SOFTWARE. #include "common.h" -static const Unicode MARKERS[] = { +static const Py_UCS4 MARKERS[] = { '{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', '-', '!', '\n', '\0'}; diff --git a/mwparserfromhell/parser/ctokenizer/tok_support.c b/mwparserfromhell/parser/ctokenizer/tok_support.c index 30dc2a1..bf554f6 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_support.c +++ b/mwparserfromhell/parser/ctokenizer/tok_support.c @@ -275,7 +275,7 @@ int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, /* Write a Unicode codepoint to the current textbuffer. */ -int Tokenizer_emit_char(Tokenizer* self, Unicode code) +int Tokenizer_emit_char(Tokenizer* self, Py_UCS4 code) { return Textbuffer_write(self->topstack->textbuffer, code); } @@ -389,19 +389,15 @@ int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) /* Internal function to read the codepoint at the given index from the input. */ -static Unicode read_codepoint(TokenizerInput* text, Py_ssize_t index) +static Py_UCS4 read_codepoint(TokenizerInput* text, Py_ssize_t index) { -#ifdef PEP_393 return PyUnicode_READ(text->kind, text->data, index); -#else - return text->buf[index]; -#endif } /* Read the value at a relative point in the wikicode, forwards. */ -Unicode Tokenizer_read(Tokenizer* self, Py_ssize_t delta) +Py_UCS4 Tokenizer_read(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index = self->head + delta; @@ -413,7 +409,7 @@ Unicode Tokenizer_read(Tokenizer* self, Py_ssize_t delta) /* Read the value at a relative point in the wikicode, backwards. */ -Unicode Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) +Py_UCS4 Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index; diff --git a/mwparserfromhell/parser/ctokenizer/tok_support.h b/mwparserfromhell/parser/ctokenizer/tok_support.h index f65d102..d08f5c4 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_support.h +++ b/mwparserfromhell/parser/ctokenizer/tok_support.h @@ -38,14 +38,14 @@ void Tokenizer_free_bad_route_tree(Tokenizer*); int Tokenizer_emit_token(Tokenizer*, PyObject*, int); int Tokenizer_emit_token_kwargs(Tokenizer*, PyObject*, PyObject*, int); -int Tokenizer_emit_char(Tokenizer*, Unicode); +int Tokenizer_emit_char(Tokenizer*, Py_UCS4); int Tokenizer_emit_text(Tokenizer*, const char*); int Tokenizer_emit_textbuffer(Tokenizer*, Textbuffer*); int Tokenizer_emit_all(Tokenizer*, PyObject*); int Tokenizer_emit_text_then_stack(Tokenizer*, const char*); -Unicode Tokenizer_read(Tokenizer*, Py_ssize_t); -Unicode Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); +Py_UCS4 Tokenizer_read(Tokenizer*, Py_ssize_t); +Py_UCS4 Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); /* Macros */ diff --git a/mwparserfromhell/parser/ctokenizer/tokenizer.c b/mwparserfromhell/parser/ctokenizer/tokenizer.c index 24d0b4a..a501032 100644 --- a/mwparserfromhell/parser/ctokenizer/tokenizer.c +++ b/mwparserfromhell/parser/ctokenizer/tokenizer.c @@ -85,12 +85,8 @@ static void init_tokenizer_text(TokenizerInput* text) text->object = Py_None; Py_INCREF(Py_None); text->length = 0; -#ifdef PEP_393 text->kind = PyUnicode_WCHAR_KIND; text->data = NULL; -#else - text->buf = NULL; -#endif } /* @@ -119,14 +115,10 @@ static int load_tokenizer_text(TokenizerInput* text, PyObject *input) dealloc_tokenizer_text(text); text->object = input; -#ifdef PEP_393 if (PyUnicode_READY(input) < 0) return -1; text->kind = PyUnicode_KIND(input); text->data = PyUnicode_DATA(input); -#else - text->buf = PyUnicode_AS_UNICODE(input); -#endif text->length = PyUnicode_GET_LENGTH(input); return 0; } @@ -192,11 +184,9 @@ static int load_entities(void) { PyObject *tempmod, *defmap, *deflist; unsigned numdefs, i; -#ifdef IS_PY3K PyObject *string; -#endif - tempmod = PyImport_ImportModule(ENTITYDEFS_MODULE); + tempmod = PyImport_ImportModule("html.entities"); if (!tempmod) return -1; defmap = PyObject_GetAttrString(tempmod, "entitydefs"); @@ -212,14 +202,10 @@ static int load_entities(void) if (!entitydefs) return -1; for (i = 0; i < numdefs; i++) { -#ifdef IS_PY3K string = PyUnicode_AsASCIIString(PyList_GET_ITEM(deflist, i)); if (!string) return -1; entitydefs[i] = PyBytes_AsString(string); -#else - entitydefs[i] = PyBytes_AsString(PyList_GET_ITEM(deflist, i)); -#endif if (!entitydefs[i]) return -1; } @@ -233,7 +219,7 @@ static int load_tokens(void) *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), *fromlist = PyList_New(1), - *modname = IMPORT_NAME_FUNC("tokens"); + *modname = PyUnicode_FromString("tokens"); char *name = "mwparserfromhell.parser"; if (!fromlist || !modname) @@ -256,7 +242,7 @@ static int load_defs(void) *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), *fromlist = PyList_New(1), - *modname = IMPORT_NAME_FUNC("definitions"); + *modname = PyUnicode_FromString("definitions"); char *name = "mwparserfromhell"; if (!fromlist || !modname) @@ -277,7 +263,7 @@ static int load_exceptions(void) *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), *fromlist = PyList_New(1), - *modname = IMPORT_NAME_FUNC("parser"); + *modname = PyUnicode_FromString("parser"); char *name = "mwparserfromhell"; if (!fromlist || !modname) @@ -294,24 +280,22 @@ static int load_exceptions(void) return 0; } -PyMODINIT_FUNC INIT_FUNC_NAME(void) +PyMODINIT_FUNC PyInit__tokenizer(void) { PyObject *module; TokenizerType.tp_new = PyType_GenericNew; if (PyType_Ready(&TokenizerType) < 0) - INIT_ERROR; - module = CREATE_MODULE; + return NULL; + module = PyModule_Create(&module_def); if (!module) - INIT_ERROR; + return NULL; Py_INCREF(&TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); Py_INCREF(Py_True); PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); NOARGS = PyTuple_New(0); if (!NOARGS || load_entities() || load_tokens() || load_defs()) - INIT_ERROR; -#ifdef IS_PY3K + return NULL; return module; -#endif } diff --git a/mwparserfromhell/parser/ctokenizer/tokenizer.h b/mwparserfromhell/parser/ctokenizer/tokenizer.h index 6050ce0..ac98d79 100644 --- a/mwparserfromhell/parser/ctokenizer/tokenizer.h +++ b/mwparserfromhell/parser/ctokenizer/tokenizer.h @@ -32,22 +32,6 @@ static void Tokenizer_dealloc(Tokenizer*); static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); -/* Compatibility macros */ - -#ifdef IS_PY3K - #define IMPORT_NAME_FUNC PyUnicode_FromString - #define CREATE_MODULE PyModule_Create(&module_def); - #define ENTITYDEFS_MODULE "html.entities" - #define INIT_FUNC_NAME PyInit__tokenizer - #define INIT_ERROR return NULL -#else - #define IMPORT_NAME_FUNC PyBytes_FromString - #define CREATE_MODULE Py_InitModule("_tokenizer", NULL); - #define ENTITYDEFS_MODULE "htmlentitydefs" - #define INIT_FUNC_NAME init_tokenizer - #define INIT_ERROR return -#endif - /* Structs */ static PyMethodDef Tokenizer_methods[] = { @@ -101,11 +85,9 @@ static PyTypeObject TokenizerType = { Tokenizer_new, /* tp_new */ }; -#ifdef IS_PY3K static PyModuleDef module_def = { PyModuleDef_HEAD_INIT, "_tokenizer", "Creates a list of tokens from a string of wikicode.", -1, NULL, NULL, NULL, NULL, NULL }; -#endif diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index f44360e..a95c477 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,12 +19,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals +import html.entities as htmlentities from math import log import re from . import contexts, tokens, ParserError -from ..compat import htmlentities, range from ..definitions import (get_html_tag, is_parsable, is_single, is_single_only, is_scheme) @@ -35,11 +33,11 @@ class BadRoute(Exception): """Raised internally when the current tokenization route is invalid.""" def __init__(self, context=0): - super(BadRoute, self).__init__() + super().__init__() self.context = context -class _TagOpenData(object): +class _TagOpenData: """Stores data about an HTML open tag, like ````.""" CX_NAME = 1 << 0 CX_ATTR_READY = 1 << 1 @@ -57,7 +55,7 @@ class _TagOpenData(object): self.reset = 0 -class Tokenizer(object): +class Tokenizer: """Creates a list of tokens from a string of wikicode.""" USES_C = False START = object() diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index 3110179..ec99c67 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -28,9 +27,6 @@ a syntactically valid form by the :class:`.Tokenizer`, and then converted into the :class`.Wikicode` tree by the :class:`.Builder`. """ -from __future__ import unicode_literals - -from ..compat import py3k, str __all__ = ["Token"] @@ -65,7 +61,7 @@ class Token(dict): def make(name): """Create a new Token class using ``type()`` and add it to ``__all__``.""" __all__.append(name) - return type(name if py3k else name.encode("utf8"), (Token,), {}) + return type(name, (Token,), {}) Text = make("Text") diff --git a/mwparserfromhell/smart_list/ListProxy.py b/mwparserfromhell/smart_list/ListProxy.py index 6d4b85c..35b45dc 100644 --- a/mwparserfromhell/smart_list/ListProxy.py +++ b/mwparserfromhell/smart_list/ListProxy.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan @@ -24,7 +23,6 @@ # SmartList has to be a full import in order to avoid cyclical import errors import mwparserfromhell.smart_list.SmartList from .utils import _SliceNormalizerMixIn, inheritdoc -from ..compat import py3k class _ListProxy(_SliceNormalizerMixIn, list): @@ -36,7 +34,7 @@ class _ListProxy(_SliceNormalizerMixIn, list): """ def __init__(self, parent, sliceinfo): - super(_ListProxy, self).__init__() + super().__init__() self._parent = parent self._sliceinfo = sliceinfo @@ -73,12 +71,8 @@ class _ListProxy(_SliceNormalizerMixIn, list): return self._render() >= list(other) return self._render() >= other - if py3k: - def __bool__(self): - return bool(self._render()) - else: - def __nonzero__(self): - return bool(self._render()) + def __bool__(self): + return bool(self._render()) def __len__(self): return max((self._stop - self._start) // self._step, 0) @@ -138,16 +132,6 @@ class _ListProxy(_SliceNormalizerMixIn, list): def __contains__(self, item): return item in self._render() - if not py3k: - def __getslice__(self, start, stop): - return self.__getitem__(slice(start, stop)) - - def __setslice__(self, start, stop, iterable): - self.__setitem__(slice(start, stop), iterable) - - def __delslice__(self, start, stop): - self.__delitem__(slice(start, stop)) - def __add__(self, other): return mwparserfromhell.smart_list.SmartList(list(self) + other) @@ -237,27 +221,13 @@ class _ListProxy(_SliceNormalizerMixIn, list): item.reverse() self._parent[self._start:self._stop:self._step] = item - if py3k: - @inheritdoc - def sort(self, key=None, reverse=None): - item = self._render() - kwargs = {} - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - item.sort(**kwargs) - self._parent[self._start:self._stop:self._step] = item - else: - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - item = self._render() - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - item.sort(**kwargs) - self._parent[self._start:self._stop:self._step] = item + @inheritdoc + def sort(self, key=None, reverse=None): + item = self._render() + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item diff --git a/mwparserfromhell/smart_list/SmartList.py b/mwparserfromhell/smart_list/SmartList.py index 30d2b1e..c2e83a4 100644 --- a/mwparserfromhell/smart_list/SmartList.py +++ b/mwparserfromhell/smart_list/SmartList.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # Copyright (C) 2012-2016 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # @@ -25,7 +23,6 @@ from _weakref import ref from .ListProxy import _ListProxy from .utils import _SliceNormalizerMixIn, inheritdoc -from ..compat import py3k class SmartList(_SliceNormalizerMixIn, list): @@ -54,14 +51,14 @@ class SmartList(_SliceNormalizerMixIn, list): def __init__(self, iterable=None): if iterable: - super(SmartList, self).__init__(iterable) + super().__init__(iterable) else: - super(SmartList, self).__init__() + super().__init__() self._children = {} def __getitem__(self, key): if not isinstance(key, slice): - return super(SmartList, self).__getitem__(key) + return super().__getitem__(key) key = self._normalize_slice(key, clamp=False) sliceinfo = [key.start, key.stop, key.step] child = _ListProxy(self, sliceinfo) @@ -71,44 +68,32 @@ class SmartList(_SliceNormalizerMixIn, list): def __setitem__(self, key, item): if not isinstance(key, slice): - return super(SmartList, self).__setitem__(key, item) + return super().__setitem__(key, item) item = list(item) - super(SmartList, self).__setitem__(key, item) + super().__setitem__(key, item) key = self._normalize_slice(key, clamp=True) diff = len(item) + (key.start - key.stop) // key.step if not diff: return - values = self._children.values if py3k else self._children.itervalues - for child, (start, stop, step) in values(): + for child, (start, stop, step) in self._children.values(): if start > key.stop: self._children[id(child)][1][0] += diff if stop is not None and stop >= key.stop: self._children[id(child)][1][1] += diff def __delitem__(self, key): - super(SmartList, self).__delitem__(key) + super().__delitem__(key) if isinstance(key, slice): key = self._normalize_slice(key, clamp=True) else: key = slice(key, key + 1, 1) diff = (key.stop - key.start) // key.step - values = self._children.values if py3k else self._children.itervalues - for child, (start, stop, step) in values(): + for child, (start, stop, step) in self._children.values(): if start > key.start: self._children[id(child)][1][0] -= diff if stop is not None and stop >= key.stop: self._children[id(child)][1][1] -= diff - if not py3k: - def __getslice__(self, start, stop): - return self.__getitem__(slice(start, stop)) - - def __setslice__(self, start, stop, iterable): - self.__setitem__(slice(start, stop), iterable) - - def __delslice__(self, start, stop): - self.__delitem__(slice(start, stop)) - def __add__(self, other): return SmartList(list(self) + other) @@ -159,27 +144,14 @@ class SmartList(_SliceNormalizerMixIn, list): @inheritdoc def reverse(self): self._detach_children() - super(SmartList, self).reverse() - - if py3k: - @inheritdoc - def sort(self, key=None, reverse=None): - self._detach_children() - kwargs = {} - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - super(SmartList, self).sort(**kwargs) - else: - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - self._detach_children() - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - super(SmartList, self).sort(**kwargs) + super().reverse() + + @inheritdoc + def sort(self, key=None, reverse=None): + self._detach_children() + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super().sort(**kwargs) diff --git a/mwparserfromhell/smart_list/__init__.py b/mwparserfromhell/smart_list/__init__.py index 81d4fb1..fdf7bd8 100644 --- a/mwparserfromhell/smart_list/__init__.py +++ b/mwparserfromhell/smart_list/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan diff --git a/mwparserfromhell/smart_list/utils.py b/mwparserfromhell/smart_list/utils.py index 609b095..1a36d0b 100644 --- a/mwparserfromhell/smart_list/utils.py +++ b/mwparserfromhell/smart_list/utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # Copyright (C) 2012-2016 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # @@ -21,8 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals - from sys import maxsize __all__ = [] @@ -38,7 +34,7 @@ def inheritdoc(method): return method -class _SliceNormalizerMixIn(object): +class _SliceNormalizerMixIn: """MixIn that provides a private method to normalize slices.""" def _normalize_slice(self, key, clamp=False): diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 3664a09..564706d 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -22,14 +21,11 @@ """ This module contains the :class:`.StringMixIn` type, which implements the -interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. +interface for the ``str`` type in a dynamic manner. """ -from __future__ import unicode_literals from sys import getdefaultencoding -from .compat import bytes, py3k, str - __all__ = ["StringMixIn"] def inheritdoc(method): @@ -41,24 +37,20 @@ def inheritdoc(method): method.__doc__ = getattr(str, method.__name__).__doc__ return method -class StringMixIn(object): +class StringMixIn: """Implement the interface for ``unicode``/``str`` in a dynamic manner. To use this class, inherit from it and override the :meth:`__unicode__` - method (same on py3k) to return the string representation of the object. + method to return the string representation of the object. The various string methods will operate on the value of :meth:`__unicode__` instead of the immutable ``self`` like the regular ``str`` type. """ - if py3k: - def __str__(self): - return self.__unicode__() + def __str__(self): + return self.__unicode__() - def __bytes__(self): - return bytes(self.__unicode__(), getdefaultencoding()) - else: - def __str__(self): - return bytes(self.__unicode__()) + def __bytes__(self): + return bytes(self.__unicode__(), getdefaultencoding()) def __unicode__(self): raise NotImplementedError() @@ -84,19 +76,14 @@ class StringMixIn(object): def __ge__(self, other): return self.__unicode__() >= other - if py3k: - def __bool__(self): - return bool(self.__unicode__()) - else: - def __nonzero__(self): - return bool(self.__unicode__()) + def __bool__(self): + return bool(self.__unicode__()) def __len__(self): return len(self.__unicode__()) def __iter__(self): - for char in self.__unicode__(): - yield char + yield from self.__unicode__() def __getitem__(self, key): return self.__unicode__()[key] @@ -113,8 +100,7 @@ class StringMixIn(object): type(self).__name__, attr)) return getattr(self.__unicode__(), attr) - if py3k: - maketrans = str.maketrans # Static method can't rely on __getattr__ + maketrans = str.maketrans # Static method can't rely on __getattr__ del inheritdoc diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index d30a2da..9e5e14b 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -25,9 +24,7 @@ This module contains accessory functions for other parts of the library. Parser users generally won't need stuff from here. """ -from __future__ import unicode_literals -from .compat import bytes, str from .nodes import Node from .smart_list import SmartList diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 1a966e2..f72c26b 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,12 +19,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals - import re from itertools import chain -from .compat import bytes, py3k, range, str from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) from .smart_list.ListProxy import _ListProxy @@ -49,7 +45,7 @@ class Wikicode(StringMixIn): RECURSE_OTHERS = 2 def __init__(self, nodes): - super(Wikicode, self).__init__() + super().__init__() self._nodes = nodes def __unicode__(self): @@ -64,8 +60,7 @@ class Wikicode(StringMixIn): for code in node.__children__(): for child in code.nodes: sub = Wikicode._get_children(child, contexts, restrict, code) - for result in sub: - yield result + yield from sub @staticmethod def _slice_replace(code, index, old, new): @@ -253,7 +248,7 @@ class Wikicode(StringMixIn): self.ifilter(forcetype=ftype, *a, **kw)) make_filter = lambda ftype: (lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw)) - for name, ftype in (meths.items() if py3k else meths.iteritems()): + for name, ftype in meths.items(): ifilter = make_ifilter(ftype) filter = make_filter(ftype) ifilter.__doc__ = doc.format(name, "ifilter", ftype) diff --git a/scripts/memtest.py b/scripts/memtest.py index 64e8c6b..f60e260 100644 --- a/scripts/memtest.py +++ b/scripts/memtest.py @@ -40,7 +40,6 @@ import sys import psutil -from mwparserfromhell.compat import py3k from mwparserfromhell.parser._tokenizer import CTokenizer if sys.version_info[0] == 2: @@ -88,8 +87,6 @@ class MemoryTest(object): def load_file(filename): with open(filename, "rU") as fp: text = fp.read() - if not py3k: - text = text.decode("utf8") name = path.split(filename)[1][:0-len(extension)] self._parse_file(name, text) diff --git a/setup.py b/setup.py index 97abef0..74f7567 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ #! /usr/bin/env python -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2018 Ben Kurtovic # @@ -21,23 +20,20 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import print_function from distutils.errors import DistutilsError, CCompilerError from glob import glob from os import environ import sys -if ((sys.version_info[0] == 2 and sys.version_info[1] < 7) or - (sys.version_info[1] == 3 and sys.version_info[1] < 4)): - raise RuntimeError("mwparserfromhell needs Python 2.7 or 3.4+") +if sys.version_info[1] == 3 and sys.version_info[1] < 4: + raise RuntimeError("mwparserfromhell needs 3.4+") from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext from mwparserfromhell import __version__ -from mwparserfromhell.compat import py3k -with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp: +with open("README.rst", encoding='utf-8') as fp: long_docs = fp.read() use_extension = True @@ -98,8 +94,6 @@ setup( "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", diff --git a/tests/__init__.py b/tests/__init__.py index 89907bf..e69de29 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 4d19dd4..f61cb10 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,13 +19,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import codecs from os import listdir, path import sys import warnings -from mwparserfromhell.compat import py3k, str from mwparserfromhell.parser import tokens from mwparserfromhell.parser.builder import Builder @@ -35,7 +32,7 @@ class _TestParseError(Exception): pass -class TokenizerTestCase(object): +class TokenizerTestCase: """A base test case for tokenizers, whose tests are loaded dynamically. Subclassed along with unittest.TestCase to form TestPyTokenizer and @@ -60,8 +57,6 @@ class TokenizerTestCase(object): actual = self.tokenizer().tokenize(data["input"]) self.assertEqual(expected, actual) - if not py3k: - inner.__name__ = funcname.encode("utf8") inner.__doc__ = data["label"] return inner diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index aba54d1..cdfbd3a 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from unittest import TestCase -from mwparserfromhell.compat import range from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from mwparserfromhell.nodes.extras import Attribute, Parameter diff --git a/tests/compat.py b/tests/compat.py deleted file mode 100644 index d5b3fba..0000000 --- a/tests/compat.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Serves the same purpose as mwparserfromhell.compat, but only for objects -required by unit tests. This avoids unnecessary imports (like urllib) within -the main library. -""" - -from mwparserfromhell.compat import py3k - -if py3k: - from io import StringIO - from urllib.parse import urlencode - from urllib.request import urlopen - -else: - from StringIO import StringIO - from urllib import urlencode, urlopen diff --git a/tests/test_argument.py b/tests/test_argument.py index 16b4d0c..eaf8abe 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext diff --git a/tests/test_attribute.py b/tests/test_attribute.py index e9f2528..b0d0e85 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Template from mwparserfromhell.nodes.extras import Attribute diff --git a/tests/test_builder.py b/tests/test_builder.py index 7343077..e5f43aa 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import py3k from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from mwparserfromhell.nodes.extras import Attribute, Parameter @@ -428,9 +425,8 @@ class TestBuilder(TreeEqualityTestCase): [tokens.TagOpenOpen()] ] - func = self.assertRaisesRegex if py3k else self.assertRaisesRegexp msg = r"_handle_token\(\) got unexpected TemplateClose" - func(ParserError, msg, self.builder.build, [tokens.TemplateClose()]) + self.assertRaisesRegex(ParserError, msg, self.builder.build, [tokens.TemplateClose()]) for test in missing_closes: self.assertRaises(ParserError, self.builder.build, test) diff --git a/tests/test_comment.py b/tests/test_comment.py index cf2f14d..1024e60 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Comment from ._test_tree_equality import TreeEqualityTestCase diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 3552a02..f9b8d2f 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,7 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest try: diff --git a/tests/test_docs.py b/tests/test_docs.py index 8559493..2e78106 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,15 +19,14 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import print_function, unicode_literals import json +from io import StringIO import os import unittest +from urllib.parse import urlencode +from urllib.request import urlopen import mwparserfromhell -from mwparserfromhell.compat import py3k, str - -from .compat import StringIO, urlencode, urlopen class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" @@ -47,16 +45,10 @@ class TestDocs(unittest.TestCase): self.assertPrint(wikicode, "I has a template! {{foo|bar|baz|eggs=spam}} See it?") templates = wikicode.filter_templates() - if py3k: - self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") - else: - self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']") + self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") template = templates[0] self.assertPrint(template.name, "foo") - if py3k: - self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") - else: - self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']") + self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") self.assertPrint(template.get(1).value, "bar") self.assertPrint(template.get("eggs").value, "spam") @@ -64,21 +56,14 @@ class TestDocs(unittest.TestCase): """test a block of example code in the README""" text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" temps = mwparserfromhell.parse(text).filter_templates() - if py3k: - res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" - else: - res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']" + res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" self.assertPrint(temps, res) def test_readme_3(self): """test a block of example code in the README""" code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") - if py3k: - self.assertPrint(code.filter_templates(recursive=False), - "['{{foo|this {{includes a|template}}}}']") - else: - self.assertPrint(code.filter_templates(recursive=False), - "[u'{{foo|this {{includes a|template}}}}']") + self.assertPrint(code.filter_templates(recursive=False), + "['{{foo|this {{includes a|template}}}}']") foo = code.filter_templates(recursive=False)[0] self.assertPrint(foo.get(1).value, "this {{includes a|template}}") self.assertPrint(foo.get(1).value.filter_templates()[0], @@ -98,10 +83,7 @@ class TestDocs(unittest.TestCase): code.replace("{{uncategorized}}", "{{bar-stub}}") res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" self.assertPrint(code, res) - if py3k: - res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" - else: - res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']" + res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" self.assertPrint(code.filter_templates(), res) text = str(code) res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" @@ -126,14 +108,14 @@ class TestDocs(unittest.TestCase): } try: raw = urlopen(url1, urlencode(data).encode("utf8")).read() - except IOError: + except OSError: self.skipTest("cannot continue because of unsuccessful web call") res = json.loads(raw.decode("utf8")) revision = res["query"]["pages"][0]["revisions"][0] text = revision["slots"]["main"]["content"] try: expected = urlopen(url2.format(title)).read().decode("utf8") - except IOError: + except OSError: self.skipTest("cannot continue because of unsuccessful web call") actual = mwparserfromhell.parse(text) self.assertEqual(expected, actual) diff --git a/tests/test_external_link.py b/tests/test_external_link.py index c70905a..48a7b82 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import ExternalLink, Text from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext diff --git a/tests/test_heading.py b/tests/test_heading.py index e5ec470..46c6258 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index fc09fde..273ee21 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity from ._test_tree_equality import TreeEqualityTestCase, wrap diff --git a/tests/test_parameter.py b/tests/test_parameter.py index be09448..d53c7af 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text from mwparserfromhell.nodes.extras import Parameter diff --git a/tests/test_parser.py b/tests/test_parser.py index 5b12a0e..22a76f6 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,11 +19,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest from mwparserfromhell import parser -from mwparserfromhell.compat import range from mwparserfromhell.nodes import Tag, Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index a4c9bc1..9fd0c3e 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2019 Ben Kurtovic # @@ -20,7 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest from mwparserfromhell.parser import contexts diff --git a/tests/test_roundtripping.py b/tests/test_roundtripping.py index 50f9c1f..9ecd5bd 100644 --- a/tests/test_roundtripping.py +++ b/tests/test_roundtripping.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,7 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest from ._test_tokenizer import TokenizerTestCase diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 8deddd5..16d99e7 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,11 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals - import unittest -from mwparserfromhell.compat import py3k, range from mwparserfromhell.smart_list import SmartList from mwparserfromhell.smart_list.ListProxy import _ListProxy @@ -129,14 +125,9 @@ class TestSmartList(unittest.TestCase): list3 = builder([0, 2, 3, 4]) list4 = builder([0, 1, 2]) - if py3k: - self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) - self.assertEqual(b"\x00\x01\x02", bytes(list4)) - self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) - else: - self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) - self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) - self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) + self.assertEqual(b"\x00\x01\x02", bytes(list4)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) self.assertLess(list1, list3) self.assertLessEqual(list1, list3) @@ -264,12 +255,6 @@ class TestSmartList(unittest.TestCase): self.assertEqual([0, 2, 2, 3, 4, 5], list1) list1.sort(reverse=True) self.assertEqual([5, 4, 3, 2, 2, 0], list1) - if not py3k: - func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3 - list1.sort(cmp=func) - self.assertEqual([3, 4, 2, 2, 5, 0], list1) - list1.sort(cmp=func, reverse=True) - self.assertEqual([0, 5, 4, 2, 2, 3], list1) list3.sort(key=lambda i: i[1]) self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) list3.sort(key=lambda i: i[1], reverse=True) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 11ee6b7..673d5fa 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,12 +19,10 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from sys import getdefaultencoding from types import GeneratorType import unittest -from mwparserfromhell.compat import bytes, py3k, range, str from mwparserfromhell.string_mixin import StringMixIn class _FakeString(StringMixIn): @@ -42,18 +39,16 @@ class TestStringMixIn(unittest.TestCase): def test_docs(self): """make sure the various methods of StringMixIn have docstrings""" methods = [ - "capitalize", "center", "count", "encode", "endswith", - "expandtabs", "find", "format", "index", "isalnum", "isalpha", - "isdecimal", "isdigit", "islower", "isnumeric", "isspace", - "istitle", "isupper", "join", "ljust", "lower", "lstrip", - "partition", "replace", "rfind", "rindex", "rjust", "rpartition", - "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", - "swapcase", "title", "translate", "upper", "zfill"] - if py3k: - methods.extend(["casefold", "format_map", "isidentifier", "isprintable", - "maketrans"]) - else: - methods.append("decode") + "capitalize", "casefold", "center", "count", "encode", "endswith", + "expandtabs", "find", "format", "format_map", "index", "isalnum", + "isalpha", "isdecimal", "isdigit", "isidentifier", "islower", + "isnumeric", "isprintable", "isspace", "istitle", "isupper", + "join", "ljust", "lower", "lstrip", "maketrans", "partition", + "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", + "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", + "title", "translate", "upper", "zfill" + ] + for meth in methods: expected = getattr("foo", meth).__doc__ actual = getattr(_FakeString("foo"), meth).__doc__ @@ -64,17 +59,11 @@ class TestStringMixIn(unittest.TestCase): fstr = _FakeString("fake string") self.assertEqual(str(fstr), "fake string") self.assertEqual(bytes(fstr), b"fake string") - if py3k: - self.assertEqual(repr(fstr), "'fake string'") - else: - self.assertEqual(repr(fstr), b"u'fake string'") + self.assertEqual(repr(fstr), "'fake string'") self.assertIsInstance(str(fstr), str) self.assertIsInstance(bytes(fstr), bytes) - if py3k: - self.assertIsInstance(repr(fstr), str) - else: - self.assertIsInstance(repr(fstr), bytes) + self.assertIsInstance(repr(fstr), str) def test_comparisons(self): """make sure comparison operators work""" @@ -179,14 +168,6 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(1, str1.count("r", 5, 9)) self.assertEqual(0, str1.count("r", 5, 7)) - if not py3k: - str2 = _FakeString("fo") - self.assertEqual(str1, str1.decode()) - actual = _FakeString("\\U00010332\\U0001033f\\U00010344") - self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape")) - self.assertRaises(UnicodeError, str2.decode, "punycode") - self.assertEqual("", str2.decode("punycode", "ignore")) - str3 = _FakeString("𐌲𐌿𐍄") actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" self.assertEqual(b"fake string", str1.encode()) @@ -233,10 +214,9 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz")) self.assertRaises(IndexError, str8.format, "abc") - if py3k: - self.assertEqual("fake string", str1.format_map({})) - self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) - self.assertRaises(ValueError, str5.format_map, {0: "abc"}) + self.assertEqual("fake string", str1.format_map({})) + self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) + self.assertRaises(ValueError, str5.format_map, {0: "abc"}) self.assertEqual(3, str1.index("e")) self.assertRaises(ValueError, str1.index, "z") @@ -269,11 +249,10 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str13.isdigit()) self.assertTrue(str14.isdigit()) - if py3k: - self.assertTrue(str9.isidentifier()) - self.assertTrue(str10.isidentifier()) - self.assertFalse(str11.isidentifier()) - self.assertFalse(str12.isidentifier()) + self.assertTrue(str9.isidentifier()) + self.assertTrue(str10.isidentifier()) + self.assertFalse(str11.isidentifier()) + self.assertFalse(str12.isidentifier()) str15 = _FakeString("") str16 = _FakeString("FooBar") @@ -286,13 +265,12 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(str13.isnumeric()) self.assertTrue(str14.isnumeric()) - if py3k: - str16B = _FakeString("\x01\x02") - self.assertTrue(str9.isprintable()) - self.assertTrue(str13.isprintable()) - self.assertTrue(str14.isprintable()) - self.assertTrue(str15.isprintable()) - self.assertFalse(str16B.isprintable()) + str16B = _FakeString("\x01\x02") + self.assertTrue(str9.isprintable()) + self.assertTrue(str13.isprintable()) + self.assertTrue(str14.isprintable()) + self.assertTrue(str15.isprintable()) + self.assertFalse(str16B.isprintable()) str17 = _FakeString(" ") str18 = _FakeString("\t \t \r\n") @@ -323,10 +301,9 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("", str15.lower()) self.assertEqual("foobar", str16.lower()) self.assertEqual("ß", str22.lower()) - if py3k: - self.assertEqual("", str15.casefold()) - self.assertEqual("foobar", str16.casefold()) - self.assertEqual("ss", str22.casefold()) + self.assertEqual("", str15.casefold()) + self.assertEqual("foobar", str16.casefold()) + self.assertEqual("ss", str22.casefold()) str23 = _FakeString(" fake string ") self.assertEqual("fake string", str1.lstrip()) @@ -372,9 +349,8 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] self.assertEqual(actual, str25.rsplit(" ", 3)) - if py3k: - actual = [" this is a", "sentence", "with", "whitespace"] - self.assertEqual(actual, str25.rsplit(maxsplit=3)) + actual = [" this is a", "sentence", "with", "whitespace"] + self.assertEqual(actual, str25.rsplit(maxsplit=3)) self.assertEqual("fake string", str1.rstrip()) self.assertEqual(" fake string", str23.rstrip()) @@ -390,9 +366,8 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] self.assertEqual(actual, str25.split(" ", 3)) - if py3k: - actual = ["this", "is", "a", "sentence with whitespace "] - self.assertEqual(actual, str25.split(maxsplit=3)) + actual = ["this", "is", "a", "sentence with whitespace "] + self.assertEqual(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") self.assertEqual(["lines", "of", "text", "are", "presented", "here"], @@ -411,17 +386,13 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("Fake String", str1.title()) - if py3k: - table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", - 111: "4", 117: "5"}) - table2 = StringMixIn.maketrans("aeiou", "12345") - table3 = StringMixIn.maketrans("aeiou", "12345", "rts") - self.assertEqual("f1k2 str3ng", str1.translate(table1)) - self.assertEqual("f1k2 str3ng", str1.translate(table2)) - self.assertEqual("f1k2 3ng", str1.translate(table3)) - else: - table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} - self.assertEqual("f1k2 str3ng", str1.translate(table)) + table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", + 111: "4", 117: "5"}) + table2 = StringMixIn.maketrans("aeiou", "12345") + table3 = StringMixIn.maketrans("aeiou", "12345", "rts") + self.assertEqual("f1k2 str3ng", str1.translate(table1)) + self.assertEqual("f1k2 str3ng", str1.translate(table2)) + self.assertEqual("f1k2 3ng", str1.translate(table3)) self.assertEqual("", str15.upper()) self.assertEqual("FOOBAR", str16.upper()) diff --git a/tests/test_tag.py b/tests/test_tag.py index c8c9808..860a94b 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Tag, Template, Text from mwparserfromhell.nodes.extras import Attribute from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext diff --git a/tests/test_template.py b/tests/test_template.py index e03a564..461371d 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2017 Ben Kurtovic # @@ -20,11 +19,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from difflib import unified_diff import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell import parse diff --git a/tests/test_text.py b/tests/test_text.py index 4464418..94da937 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text class TestText(unittest.TestCase): diff --git a/tests/test_tokens.py b/tests/test_tokens.py index e766002..6ce28b5 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import py3k from mwparserfromhell.parser import tokens class TestTokens(unittest.TestCase): @@ -64,14 +61,9 @@ class TestTokens(unittest.TestCase): hundredchars = ("earwig" * 100)[:97] + "..." self.assertEqual("Token()", repr(token1)) - if py3k: - token2repr1 = "Token(foo='bar', baz=123)" - token2repr2 = "Token(baz=123, foo='bar')" - token3repr = "Text(text='" + hundredchars + "')" - else: - token2repr1 = "Token(foo=u'bar', baz=123)" - token2repr2 = "Token(baz=123, foo=u'bar')" - token3repr = "Text(text=u'" + hundredchars + "')" + token2repr1 = "Token(foo='bar', baz=123)" + token2repr2 = "Token(baz=123, foo='bar')" + token3repr = "Text(text='" + hundredchars + "')" token2repr = repr(token2) self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) self.assertEqual(token3repr, repr(token3)) diff --git a/tests/test_utils.py b/tests/test_utils.py index b79b544..b8572fd 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,7 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest from mwparserfromhell.nodes import Template, Text diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 307ee9a..9701865 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,13 +19,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals from functools import partial import re from types import GeneratorType import unittest -from mwparserfromhell.compat import py3k, str from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) from mwparserfromhell.smart_list import SmartList diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 487b7af..1865b6e 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright (C) 2012-2016 Ben Kurtovic # @@ -20,10 +19,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext From 91ed26d86403dcbc51f831879ba5ceaf5df9ac80 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Mon, 6 Jan 2020 21:20:59 -0800 Subject: [PATCH 16/31] Set python_requires in setup.py to ">= 3.4" In addition to replacing the manual version check, this will also instruct pip to download an older version of mwparserfromhell for users running earlier Python versions rather than just getting something broken. --- setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 74f7567..f339665 100644 --- a/setup.py +++ b/setup.py @@ -25,9 +25,6 @@ from glob import glob from os import environ import sys -if sys.version_info[1] == 3 and sys.version_info[1] < 4: - raise RuntimeError("mwparserfromhell needs 3.4+") - from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext @@ -80,6 +77,7 @@ setup( ext_modules = [tokenizer] if use_extension else [], test_suite = "tests", version = __version__, + python_requires = ">= 3.4", author = "Ben Kurtovic", author_email = "ben.kurtovic@gmail.com", url = "https://github.com/earwig/mwparserfromhell", From 8fdf75ccad1bc5efa535179a080a4bdc26f44760 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Sat, 8 Feb 2020 18:27:58 -0800 Subject: [PATCH 17/31] Use Github Actions to build manylinux1 wheels Just like the Windows wheels, these allow for Linux users to install mwparserfromhell and use the faster CTokenizer without needing to have build tools installed. Under the hood, this uses pypa manylinux1 docker image to build and tag the wheels, then publishes them to pypi if a new tag was pushed. Fixes #170. --- .github/workflows/build-linux-wheels.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/workflows/build-linux-wheels.yml diff --git a/.github/workflows/build-linux-wheels.yml b/.github/workflows/build-linux-wheels.yml new file mode 100644 index 0000000..c7a4f64 --- /dev/null +++ b/.github/workflows/build-linux-wheels.yml @@ -0,0 +1,25 @@ +name: Build manylinux1 wheels + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Build manylinux1 Python wheels + uses: RalfG/python-wheels-manylinux-build@916aea1f70130a34995d0236ae5c67145bfd2c4f + with: + python-versions: 'cp34-cp34m cp35-cp35m cp36-cp36m cp37-cp37m cp38-cp38' + - name: Move to dist/ + run: | + mkdir -p dist + cp -v wheelhouse/*-manylinux1_x86_64.whl dist/ + - name: Publish package to PyPi + # Only actually publish if a new tag was pushed + if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.pypi_password }} + From 6ff88262aff2bfe99941a35d7cbde74fd362ca38 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 10 Feb 2020 23:34:51 -0500 Subject: [PATCH 18/31] Pointless changes to action --- .github/workflows/build-linux-wheels.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-linux-wheels.yml b/.github/workflows/build-linux-wheels.yml index c7a4f64..2056c8c 100644 --- a/.github/workflows/build-linux-wheels.yml +++ b/.github/workflows/build-linux-wheels.yml @@ -1,6 +1,6 @@ name: Build manylinux1 wheels -on: [push] +on: push jobs: build: @@ -8,18 +8,17 @@ jobs: steps: - uses: actions/checkout@v2 - name: Build manylinux1 Python wheels - uses: RalfG/python-wheels-manylinux-build@916aea1f70130a34995d0236ae5c67145bfd2c4f + uses: RalfG/python-wheels-manylinux-build@0c24cb31441c7a1e6ea90d6a6408d406b2fee279 with: python-versions: 'cp34-cp34m cp35-cp35m cp36-cp36m cp37-cp37m cp38-cp38' - name: Move to dist/ run: | mkdir -p dist cp -v wheelhouse/*-manylinux1_x86_64.whl dist/ - - name: Publish package to PyPi + - name: Publish package to PyPI # Only actually publish if a new tag was pushed if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@master + uses: pypa/gh-action-pypi-publish@37e305e7413032d8422456179fee28fac7d25187 with: user: __token__ password: ${{ secrets.pypi_password }} - From 6e144d3657080b1bdab1b7f9c070306342989413 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Sun, 16 Feb 2020 23:20:38 -0800 Subject: [PATCH 19/31] Use Github Actions to build macOS wheels For 3.5+ on the latest macOS version. --- .github/workflows/build-macos-wheels.yml | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/build-macos-wheels.yml diff --git a/.github/workflows/build-macos-wheels.yml b/.github/workflows/build-macos-wheels.yml new file mode 100644 index 0000000..f2ab16e --- /dev/null +++ b/.github/workflows/build-macos-wheels.yml @@ -0,0 +1,33 @@ +name: Build macOS wheels + +on: [push] + +jobs: + build: + + runs-on: macos-latest + strategy: + matrix: + # macOS apparently doesn't support 3.4 + python-version: [3.5, 3.6, 3.7, 3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Build wheels + run: | + python -m pip install --upgrade pip wheel setuptools + pip wheel . -w dist/ + ls dist/ + - name: Publish package to PyPI + # Only actually publish if a new tag was pushed + if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') + # We can't use the pypa action because of https://github.com/pypa/gh-action-pypi-publish/issues/15 + run: | + pip install twine + TWINE_USERNAME="__token__" \ + TWINE_PASSWORD="${{ secrets.pypi_password }}" \ + twine upload dist/* From f241829d3726010f32af8ad494bb7ba089baa7bf Mon Sep 17 00:00:00 2001 From: AntiCompositeNumber Date: Mon, 7 Sep 2020 15:44:11 -0400 Subject: [PATCH 20/31] Make nodes.template.Template a bit more dict-like (#252) * nodes: add a `default` param to Template.get Similar to dict.get, Template.get with a default param supplied will return that value instead of raising an exception. If default is unset, Template.get will keep its previous behavior and raise an exception. * nodes: Add __getitem__, __setitem__, and __delitem__ to Template These are just aliases for existing methods, without the ability to specifiy additional parameters. However, including them makes Template more dict-like, so it's a good idea to have them. * nodes: Use def instead of assignment of a lambda in Template Per PEP8, there is no benefit to using a lambda here, and some downsides. It's the same number of SLOC either way, so might as well change it. --- mwparserfromhell/nodes/template.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 34cb1e9..9de7641 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -29,6 +29,8 @@ from ..utils import parse_anything __all__ = ["Template"] FLAGS = re.DOTALL | re.UNICODE +# Used to allow None as a valid fallback value +_UNSET = object() class Template(Node): """Represents a template in wikicode, like ``{{foo}}``.""" @@ -208,23 +210,30 @@ class Template(Node): return True return False - has_param = lambda self, name, ignore_empty=False: \ - self.has(name, ignore_empty) - has_param.__doc__ = "Alias for :meth:`has`." + def has_param(self, name, ignore_empty=False): + """Alias for :meth:`has`.""" + return self.has(name, ignore_empty) - def get(self, name): + def get(self, name, default=_UNSET): """Get the parameter whose name is *name*. The returned object is a :class:`.Parameter` instance. Raises - :exc:`ValueError` if no parameter has this name. Since multiple - parameters can have the same name, we'll return the last match, since - the last parameter is the only one read by the MediaWiki parser. + :exc:`ValueError` if no parameter has this name. If *default* is set, + returns that instead. Since multiple parameters can have the same name, + we'll return the last match, since the last parameter is the only one + read by the MediaWiki parser. """ name = str(name).strip() for param in reversed(self.params): if param.name.strip() == name: return param - raise ValueError(name) + if default is _UNSET: + raise ValueError(name) + else: + return default + + def __getitem__(self, name): + return self.get(name) def add(self, name, value, showkey=None, before=None, preserve_spacing=True): @@ -306,6 +315,9 @@ class Template(Node): self.params.append(param) return param + def __setitem__(self, name, value): + return self.add(name, value) + def remove(self, param, keep_field=False): """Remove a parameter from the template, identified by *param*. @@ -351,3 +363,6 @@ class Template(Node): raise ValueError(name) for i in reversed(to_remove): self.params.pop(i) + + def __delitem__(self, param): + return self.remove(param) From d650f827b38936b8594f45f291eeadc07f86a435 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 7 Sep 2020 16:01:54 -0400 Subject: [PATCH 21/31] Drop EOL Python 3.4 support --- .github/workflows/build-linux-wheels.yml | 2 +- .github/workflows/build-macos-wheels.yml | 5 +---- .travis.yml | 2 +- CHANGELOG | 1 + README.rst | 2 +- appveyor.yml | 8 -------- docs/changelog.rst | 1 + docs/index.rst | 2 +- setup.py | 3 +-- 9 files changed, 8 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build-linux-wheels.yml b/.github/workflows/build-linux-wheels.yml index 2056c8c..2148c6e 100644 --- a/.github/workflows/build-linux-wheels.yml +++ b/.github/workflows/build-linux-wheels.yml @@ -10,7 +10,7 @@ jobs: - name: Build manylinux1 Python wheels uses: RalfG/python-wheels-manylinux-build@0c24cb31441c7a1e6ea90d6a6408d406b2fee279 with: - python-versions: 'cp34-cp34m cp35-cp35m cp36-cp36m cp37-cp37m cp38-cp38' + python-versions: 'cp35-cp35m cp36-cp36m cp37-cp37m cp38-cp38' - name: Move to dist/ run: | mkdir -p dist diff --git a/.github/workflows/build-macos-wheels.yml b/.github/workflows/build-macos-wheels.yml index f2ab16e..83cb9f1 100644 --- a/.github/workflows/build-macos-wheels.yml +++ b/.github/workflows/build-macos-wheels.yml @@ -1,16 +1,13 @@ name: Build macOS wheels -on: [push] +on: push jobs: build: - runs-on: macos-latest strategy: matrix: - # macOS apparently doesn't support 3.4 python-version: [3.5, 3.6, 3.7, 3.8] - steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} diff --git a/.travis.yml b/.travis.yml index bee8152..23866e6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,11 @@ dist: xenial language: python python: - - 3.4 - 3.5 - 3.6 - 3.7 - 3.8 + - 3.9-dev install: - pip install coveralls - python setup.py develop diff --git a/CHANGELOG b/CHANGELOG index 53b3548..3f9ca3a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ v0.6 (unreleased): - Added support for Python 3.8. +- Dropped support for end-of-life Python 3.4. - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (#216) - Fixed a rare parsing bug involving deeply nested style tags. (#224) diff --git a/README.rst b/README.rst index 98af7a4..bbac7e6 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ mwparserfromhell **mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for MediaWiki_ -wikicode. It supports Python 3.4+. +wikicode. It supports Python 3.5+. Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. Full documentation is available on ReadTheDocs_. Development occurs on GitHub_. diff --git a/appveyor.yml b/appveyor.yml index 2a4de47..ccbaf59 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,14 +22,6 @@ environment: secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+ matrix: - - PYTHON: "C:\\Python34" - PYTHON_VERSION: "3.4" - PYTHON_ARCH: "32" - - - PYTHON: "C:\\Python34-x64" - PYTHON_VERSION: "3.4" - PYTHON_ARCH: "64" - - PYTHON: "C:\\Python35" PYTHON_VERSION: "3.5" PYTHON_ARCH: "32" diff --git a/docs/changelog.rst b/docs/changelog.rst index 1ca7411..cf3ec8d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -8,6 +8,7 @@ Unreleased (`changes `__): - Added support for Python 3.8. +- Dropped support for end-of-life Python 3.4. - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (`#216 `_) - Fixed a rare parsing bug involving deeply nested style tags. diff --git a/docs/index.rst b/docs/index.rst index 1ca69f6..6d9fcf9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,7 @@ MWParserFromHell v\ |version| Documentation :mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for MediaWiki_ -wikicode. It supports Python 3.4+. +wikicode. It supports Python 3.5+. Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. Development occurs on GitHub_. diff --git a/setup.py b/setup.py index f339665..d404ead 100644 --- a/setup.py +++ b/setup.py @@ -77,7 +77,7 @@ setup( ext_modules = [tokenizer] if use_extension else [], test_suite = "tests", version = __version__, - python_requires = ">= 3.4", + python_requires = ">= 3.5", author = "Ben Kurtovic", author_email = "ben.kurtovic@gmail.com", url = "https://github.com/earwig/mwparserfromhell", @@ -93,7 +93,6 @@ setup( "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", From c1dabcfd6629c3d1ff38d68dffdd4cd87230d44e Mon Sep 17 00:00:00 2001 From: kishor kunal raj <68464660+kishorkunal-raj@users.noreply.github.com> Date: Wed, 9 Dec 2020 21:33:47 +0530 Subject: [PATCH 22/31] Adding ppc64le architecture support on travis-ci (#260) --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 23866e6..f352d3d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,9 @@ python: - 3.7 - 3.8 - 3.9-dev +arch: + - amd64 + - ppc64le install: - pip install coveralls - python setup.py develop From 786b08828b977cc58e65e3db58c4e1cc19cbabe4 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Sat, 19 Dec 2020 15:06:44 -0800 Subject: [PATCH 23/31] Build wheels for Python 3.9 too (#262) --- .github/workflows/build-linux-wheels.yml | 2 +- .github/workflows/build-macos-wheels.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-linux-wheels.yml b/.github/workflows/build-linux-wheels.yml index 2148c6e..067f84d 100644 --- a/.github/workflows/build-linux-wheels.yml +++ b/.github/workflows/build-linux-wheels.yml @@ -10,7 +10,7 @@ jobs: - name: Build manylinux1 Python wheels uses: RalfG/python-wheels-manylinux-build@0c24cb31441c7a1e6ea90d6a6408d406b2fee279 with: - python-versions: 'cp35-cp35m cp36-cp36m cp37-cp37m cp38-cp38' + python-versions: 'cp35-cp35m cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39' - name: Move to dist/ run: | mkdir -p dist diff --git a/.github/workflows/build-macos-wheels.yml b/.github/workflows/build-macos-wheels.yml index 83cb9f1..5e93c1f 100644 --- a/.github/workflows/build-macos-wheels.yml +++ b/.github/workflows/build-macos-wheels.yml @@ -7,7 +7,7 @@ jobs: runs-on: macos-latest strategy: matrix: - python-version: [3.5, 3.6, 3.7, 3.8] + python-version: [3.5, 3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} From 237798a17eee319a713664f137dabc42a0362bdf Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 21 Dec 2020 01:59:14 -0500 Subject: [PATCH 24/31] Update tag definitions --- mwparserfromhell/definitions.py | 76 ++++++++++++++++++------ mwparserfromhell/parser/ctokenizer/definitions.c | 72 ++++++++++++++++++---- 2 files changed, 120 insertions(+), 28 deletions(-) diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py index 6191dc6..0e70cc1 100644 --- a/mwparserfromhell/definitions.py +++ b/mwparserfromhell/definitions.py @@ -27,35 +27,77 @@ When updating this file, please also update the the C tokenizer version: - mwparserfromhell/parser/ctokenizer/definitions.h """ - __all__ = ["get_html_tag", "is_parsable", "is_visible", "is_single", "is_single_only", "is_scheme"] URI_SCHEMES = { - # [mediawiki/core.git]/includes/DefaultSettings.php @ 374a0ad943 - "http": True, "https": True, "ftp": True, "ftps": True, "ssh": True, - "sftp": True, "irc": True, "ircs": True, "xmpp": False, "sip": False, - "sips": False, "gopher": True, "telnet": True, "nntp": True, - "worldwind": True, "mailto": False, "tel": False, "sms": False, - "news": False, "svn": True, "git": True, "mms": True, "bitcoin": False, - "magnet": False, "urn": False, "geo": False + # [wikimedia/mediawiki.git]/includes/DefaultSettings.php @ 5c660de5d0 + "bitcoin": False, + "ftp": True, + "ftps": True, + "geo": False, + "git": True, + "gopher": True, + "http": True, + "https": True, + "irc": True, + "ircs": True, + "magnet": False, + "mailto": False, + "mms": True, + "news": False, + "nntp": True, + "redis": True, + "sftp": True, + "sip": False, + "sips": False, + "sms": False, + "ssh": True, + "svn": True, + "tel": False, + "telnet": True, + "urn": False, + "worldwind": True, + "xmpp": False, } PARSER_BLACKLIST = [ - # enwiki extensions @ 2013-06-28 - "categorytree", "gallery", "hiero", "imagemap", "inputbox", "math", - "nowiki", "pre", "score", "section", "source", "syntaxhighlight", - "templatedata", "timeline" + # https://www.mediawiki.org/wiki/Parser_extension_tags @ 2020-12-21 + "categorytree", + "ce", + "chem", + "gallery", + "graph", + "hiero", + "imagemap", + "inputbox", + "math", + "nowiki", + "pre", + "score", + "section", + "source", + "syntaxhighlight", + "templatedata", + "timeline", ] INVISIBLE_TAGS = [ - # enwiki extensions @ 2013-06-28 - "categorytree", "gallery", "imagemap", "inputbox", "math", "score", - "section", "templatedata", "timeline" + # https://www.mediawiki.org/wiki/Parser_extension_tags @ 2020-12-21 + "categorytree", + "gallery", + "graph", + "imagemap", + "inputbox", + "math", + "score", + "section", + "templatedata", + "timeline" ] -# [mediawiki/core.git]/includes/Sanitizer.php @ 065bec63ea -SINGLE_ONLY = ["br", "hr", "meta", "link", "img", "wbr"] +# [wikimedia/mediawiki.git]/includes/parser/Sanitizer.php @ 95e17ee645 +SINGLE_ONLY = ["br", "wbr", "hr", "meta", "link", "img"] SINGLE = SINGLE_ONLY + ["li", "dt", "dd", "th", "td", "tr"] MARKUP_TO_HTML = { diff --git a/mwparserfromhell/parser/ctokenizer/definitions.c b/mwparserfromhell/parser/ctokenizer/definitions.c index e247234..b1ff278 100644 --- a/mwparserfromhell/parser/ctokenizer/definitions.c +++ b/mwparserfromhell/parser/ctokenizer/definitions.c @@ -28,29 +28,79 @@ SOFTWARE. */ static const char* URI_SCHEMES[] = { - "http", "https", "ftp", "ftps", "ssh", "sftp", "irc", "ircs", "xmpp", - "sip", "sips", "gopher", "telnet", "nntp", "worldwind", "mailto", "tel", - "sms", "news", "svn", "git", "mms", "bitcoin", "magnet", "urn", "geo", NULL + "bitcoin", + "ftp", + "ftps", + "geo", + "git", + "gopher", + "http", + "https", + "irc", + "ircs", + "magnet", + "mailto", + "mms", + "news", + "nntp", + "redis", + "sftp", + "sip", + "sips", + "sms", + "ssh", + "svn", + "tel", + "telnet", + "urn", + "worldwind", + "xmpp", + NULL, }; static const char* URI_SCHEMES_AUTHORITY_OPTIONAL[] = { - "xmpp", "sip", "sips", "mailto", "tel", "sms", "news", "bitcoin", "magnet", - "urn", "geo", NULL + "bitcoin", + "geo", + "magnet", + "mailto", + "news", + "sip", + "sips", + "sms", + "tel", + "urn", + "xmpp", + NULL, }; static const char* PARSER_BLACKLIST[] = { - "categorytree", "gallery", "hiero", "imagemap", "inputbox", "math", - "nowiki", "pre", "score", "section", "source", "syntaxhighlight", - "templatedata", "timeline", NULL + "categorytree", + "ce", + "chem", + "gallery", + "graph", + "hiero", + "imagemap", + "inputbox", + "math", + "nowiki", + "pre", + "score", + "section", + "source", + "syntaxhighlight", + "templatedata", + "timeline", + NULL, }; static const char* SINGLE[] = { - "br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", - "wbr", NULL + "br", "wbr", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", + "tr", NULL }; static const char* SINGLE_ONLY[] = { - "br", "hr", "meta", "link", "img", "wbr", NULL + "br", "wbr", "hr", "meta", "link", "img", NULL }; /* From 1c983d373863831f662ddf7b8b2ff12c58027b03 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 21 Dec 2020 03:26:54 -0500 Subject: [PATCH 25/31] Assorted cleanup, linter fixes, and improvements for Python 3 --- docs/api/mwparserfromhell.nodes.rst | 8 ++ docs/api/mwparserfromhell.parser.rst | 7 + docs/api/mwparserfromhell.rst | 16 +-- docs/api/mwparserfromhell.smart_list.rst | 30 +++++ docs/conf.py | 2 +- mwparserfromhell/__init__.py | 4 +- mwparserfromhell/definitions.py | 2 +- mwparserfromhell/nodes/__init__.py | 41 +----- mwparserfromhell/nodes/_base.py | 51 +++++++ mwparserfromhell/nodes/argument.py | 6 +- mwparserfromhell/nodes/comment.py | 6 +- mwparserfromhell/nodes/external_link.py | 7 +- mwparserfromhell/nodes/extras/attribute.py | 4 +- mwparserfromhell/nodes/extras/parameter.py | 4 +- mwparserfromhell/nodes/heading.py | 6 +- mwparserfromhell/nodes/html_entity.py | 25 ++-- mwparserfromhell/nodes/tag.py | 13 +- mwparserfromhell/nodes/template.py | 24 ++-- mwparserfromhell/nodes/text.py | 6 +- mwparserfromhell/nodes/wikilink.py | 6 +- mwparserfromhell/parser/__init__.py | 16 +-- mwparserfromhell/parser/builder.py | 13 +- mwparserfromhell/parser/ctokenizer/definitions.c | 2 +- mwparserfromhell/parser/errors.py | 34 +++++ mwparserfromhell/parser/tokenizer.py | 146 +++++++++++---------- mwparserfromhell/parser/tokens.py | 3 +- mwparserfromhell/smart_list/__init__.py | 7 +- .../smart_list/{ListProxy.py => list_proxy.py} | 29 ++-- .../smart_list/{SmartList.py => smart_list.py} | 18 +-- mwparserfromhell/string_mixin.py | 47 ++++--- mwparserfromhell/utils.py | 41 +++--- mwparserfromhell/wikicode.py | 32 ++--- scripts/memtest.py | 18 +-- setup.py | 2 +- tests/_test_tokenizer.py | 9 +- tests/_test_tree_equality.py | 3 +- tests/test_argument.py | 6 +- tests/test_attribute.py | 6 +- tests/test_comment.py | 6 +- tests/test_docs.py | 8 +- tests/test_external_link.py | 6 +- tests/test_heading.py | 6 +- tests/test_html_entity.py | 8 +- tests/test_parameter.py | 9 +- tests/test_smart_list.py | 23 ++-- tests/test_string_mixin.py | 6 +- tests/test_tag.py | 8 +- tests/test_template.py | 6 +- tests/test_text.py | 6 +- tests/test_tokens.py | 11 +- tests/test_wikicode.py | 9 +- tests/test_wikilink.py | 6 +- 52 files changed, 440 insertions(+), 378 deletions(-) create mode 100644 docs/api/mwparserfromhell.smart_list.rst create mode 100644 mwparserfromhell/nodes/_base.py create mode 100644 mwparserfromhell/parser/errors.py rename mwparserfromhell/smart_list/{ListProxy.py => list_proxy.py} (89%) rename mwparserfromhell/smart_list/{SmartList.py => smart_list.py} (91%) diff --git a/docs/api/mwparserfromhell.nodes.rst b/docs/api/mwparserfromhell.nodes.rst index 2cbaa1c..38058f2 100644 --- a/docs/api/mwparserfromhell.nodes.rst +++ b/docs/api/mwparserfromhell.nodes.rst @@ -9,6 +9,14 @@ nodes Package .. autoclass:: mwparserfromhell.nodes.Node :special-members: +:mod:`_base` Module +---------------------- + +.. automodule:: mwparserfromhell.nodes._base + :members: + :undoc-members: + :show-inheritance: + :mod:`argument` Module ---------------------- diff --git a/docs/api/mwparserfromhell.parser.rst b/docs/api/mwparserfromhell.parser.rst index c7c8639..72ee9eb 100644 --- a/docs/api/mwparserfromhell.parser.rst +++ b/docs/api/mwparserfromhell.parser.rst @@ -23,6 +23,13 @@ parser Package :members: :undoc-members: +:mod:`errors` Module +-------------------- + +.. automodule:: mwparserfromhell.parser.errors + :members: + :undoc-members: + :mod:`tokenizer` Module ----------------------- diff --git a/docs/api/mwparserfromhell.rst b/docs/api/mwparserfromhell.rst index 63af111..c0bdc88 100644 --- a/docs/api/mwparserfromhell.rst +++ b/docs/api/mwparserfromhell.rst @@ -8,27 +8,12 @@ mwparserfromhell Package :members: :undoc-members: -:mod:`compat` Module --------------------- - -.. automodule:: mwparserfromhell.compat - :members: - :undoc-members: - :mod:`definitions` Module ------------------------- .. automodule:: mwparserfromhell.definitions :members: -:mod:`smart_list` Module ------------------------- - -.. automodule:: mwparserfromhell.smart_list - :members: SmartList, _ListProxy - :undoc-members: - :show-inheritance: - :mod:`string_mixin` Module -------------------------- @@ -58,3 +43,4 @@ Subpackages mwparserfromhell.nodes mwparserfromhell.parser + mwparserfromhell.smart_list diff --git a/docs/api/mwparserfromhell.smart_list.rst b/docs/api/mwparserfromhell.smart_list.rst new file mode 100644 index 0000000..9312374 --- /dev/null +++ b/docs/api/mwparserfromhell.smart_list.rst @@ -0,0 +1,30 @@ +smart_list Package +================== + +:mod:`smart_list` Package +------------------------- + +.. automodule:: mwparserfromhell.smart_list + :members: + :undoc-members: + +:mod:`list_proxy` Module +--------------------- + +.. automodule:: mwparserfromhell.smart_list.list_proxy + :members: + :undoc-members: + +:mod:`smart_list` Module +--------------------- + +.. automodule:: mwparserfromhell.smart_list.smart_list + :members: + :undoc-members: + +:mod:`utils` Module +--------------------- + +.. automodule:: mwparserfromhell.smart_list.utils + :members: + :undoc-members: diff --git a/docs/conf.py b/docs/conf.py index 9666cd0..9946f3b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = u'mwparserfromhell' -copyright = u'2012–2019 Ben Kurtovic' +copyright = u'2012–2020 Ben Kurtovic' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 6056b83..609999b 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -26,7 +26,7 @@ outrageously powerful parser for `MediaWiki `_ wikico """ __author__ = "Ben Kurtovic" -__copyright__ = "Copyright (C) 2012-2019 Ben Kurtovic" +__copyright__ = "Copyright (C) 2012-2020 Ben Kurtovic" __license__ = "MIT License" __version__ = "0.6.dev0" __email__ = "ben.kurtovic@gmail.com" diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py index 0e70cc1..bd0e969 100644 --- a/mwparserfromhell/definitions.py +++ b/mwparserfromhell/definitions.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 6aa6ea4..4c29a5b 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,42 +28,8 @@ the name of a :class:`.Template` is a :class:`.Wikicode` object that can contain text or more templates. """ - -from ..string_mixin import StringMixIn - -__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", - "Node", "Tag", "Template", "Text", "Wikilink"] - -class Node(StringMixIn): - """Represents the base Node type, demonstrating the methods to override. - - :meth:`__unicode__` must be overridden. It should return a ``unicode`` or - (``str`` in py3k) representation of the node. If the node contains - :class:`.Wikicode` objects inside of it, :meth:`__children__` should be a - generator that iterates over them. If the node is printable - (shown when the page is rendered), :meth:`__strip__` should return its - printable version, stripping out any formatting marks. It does not have to - return a string, but something that can be converted to a string with - ``str()``. Finally, :meth:`__showtree__` can be overridden to build a - nice tree representation of the node, if desired, for - :meth:`~.Wikicode.get_tree`. - """ - def __unicode__(self): - raise NotImplementedError() - - def __children__(self): - return - # pylint: disable=unreachable - yield # pragma: no cover (this is a generator that yields nothing) - - def __strip__(self, **kwargs): - return None - - def __showtree__(self, write, get, mark): - write(str(self)) - - from . import extras +from ._base import Node from .text import Text from .argument import Argument from .comment import Comment @@ -73,3 +39,6 @@ from .html_entity import HTMLEntity from .tag import Tag from .template import Template from .wikilink import Wikilink + +__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", + "Node", "Tag", "Template", "Text", "Wikilink"] diff --git a/mwparserfromhell/nodes/_base.py b/mwparserfromhell/nodes/_base.py new file mode 100644 index 0000000..e4a3c2e --- /dev/null +++ b/mwparserfromhell/nodes/_base.py @@ -0,0 +1,51 @@ +# +# Copyright (C) 2012-2020 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from ..string_mixin import StringMixIn + +__all__ = ["Node"] + +class Node(StringMixIn): + """Represents the base Node type, demonstrating the methods to override. + + :meth:`__str__` must be overridden. It should return a ``str`` + representation of the node. If the node contains :class:`.Wikicode` + objects inside of it, :meth:`__children__` should be a generator that + iterates over them. If the node is printable (shown when the page is + rendered), :meth:`__strip__` should return its printable version, + stripping out any formatting marks. It does not have to return a string, + but something that can be converted to a string with ``str()``. Finally, + :meth:`__showtree__` can be overridden to build a nice tree representation + of the node, if desired, for :meth:`~.Wikicode.get_tree`. + """ + def __str__(self): + raise NotImplementedError() + + def __children__(self): + return + # pylint: disable=unreachable + yield # pragma: no cover (this is a generator that yields nothing) + + def __strip__(self, **kwargs): + return None + + def __showtree__(self, write, get, mark): + write(str(self)) diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index 4d9d613..a852a65 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node from ..utils import parse_anything __all__ = ["Argument"] @@ -33,7 +33,7 @@ class Argument(Node): self.name = name self.default = default - def __unicode__(self): + def __str__(self): start = "{{{" + str(self.name) if self.default is not None: return start + "|" + str(self.default) + "}}}" diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index 302699e..56b05b7 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node __all__ = ["Comment"] @@ -31,7 +31,7 @@ class Comment(Node): super().__init__() self.contents = contents - def __unicode__(self): + def __str__(self): return "" @property diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index 4dc3594..ba86659 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node from ..utils import parse_anything __all__ = ["ExternalLink"] @@ -34,7 +34,7 @@ class ExternalLink(Node): self.title = title self.brackets = brackets - def __unicode__(self): + def __str__(self): if self.brackets: if self.title is not None: return "[" + str(self.url) + " " + str(self.title) + "]" @@ -79,6 +79,7 @@ class ExternalLink(Node): @url.setter def url(self, value): + # pylint: disable=import-outside-toplevel from ..parser import contexts self._url = parse_anything(value, contexts.EXT_LINK_URI) diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 38d2423..442c3ac 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -44,7 +44,7 @@ class Attribute(StringMixIn): self.pad_before_eq = pad_before_eq self.pad_after_eq = pad_after_eq - def __unicode__(self): + def __str__(self): result = self.pad_first + str(self.name) + self.pad_before_eq if self.value is not None: result += "=" + self.pad_after_eq diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index 4478084..9287e00 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -41,7 +41,7 @@ class Parameter(StringMixIn): self.value = value self.showkey = showkey - def __unicode__(self): + def __str__(self): if self.showkey: return str(self.name) + "=" + str(self.value) return str(self.value) diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 1fe8790..de4dc70 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node from ..utils import parse_anything __all__ = ["Heading"] @@ -33,7 +33,7 @@ class Heading(Node): self.title = title self.level = level - def __unicode__(self): + def __str__(self): return ("=" * self.level) + str(self.title) + ("=" * self.level) def __children__(self): diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 8a2eef4..7371f2e 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,7 @@ import html.entities as htmlentities -from . import Node +from ._base import Node __all__ = ["HTMLEntity"] @@ -49,7 +49,7 @@ class HTMLEntity(Node): self._hexadecimal = hexadecimal self._hex_char = hex_char - def __unicode__(self): + def __str__(self): if self.named: return "&{};".format(self.value) if self.hexadecimal: @@ -98,21 +98,22 @@ class HTMLEntity(Node): int(newval) except ValueError: try: - int(newval, 16) + intval = int(newval, 16) except ValueError: if newval not in htmlentities.entitydefs: - raise ValueError("entity value is not a valid name") + raise ValueError(f"entity value {newval!r} is not a valid name") from None self._named = True self._hexadecimal = False else: - if int(newval, 16) < 0 or int(newval, 16) > 0x10FFFF: - raise ValueError("entity value is not in range(0x110000)") + if intval < 0 or intval > 0x10FFFF: + raise ValueError( + f"entity value 0x{intval:x} is not in range(0x110000)") from None self._named = False self._hexadecimal = True else: test = int(newval, 16 if self.hexadecimal else 10) if test < 0 or test > 0x10FFFF: - raise ValueError("entity value is not in range(0x110000)") + raise ValueError(f"entity value {test} is not in range(0x110000)") self._named = False self._value = newval @@ -120,13 +121,13 @@ class HTMLEntity(Node): def named(self, newval): newval = bool(newval) if newval and self.value not in htmlentities.entitydefs: - raise ValueError("entity value is not a valid name") + raise ValueError(f"entity value {self.value!r} is not a valid name") if not newval: try: int(self.value, 16) - except ValueError: - err = "current entity value is not a valid Unicode codepoint" - raise ValueError(err) + except ValueError as exc: + raise ValueError(f"current entity value {self.value!r} " + f"is not a valid Unicode codepoint") from exc self._named = newval @hexadecimal.setter diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 9fa45c5..094853b 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node from .extras import Attribute from ..definitions import is_visible from ..utils import parse_anything @@ -50,7 +50,7 @@ class Tag(Node): if closing_wiki_markup is not None: self.closing_wiki_markup = closing_wiki_markup - def __unicode__(self): + def __str__(self): if self.wiki_markup: if self.attributes: attrs = "".join([str(attr) for attr in self.attributes]) @@ -60,10 +60,9 @@ class Tag(Node): separator = self.wiki_style_separator or "" if self.self_closing: return self.wiki_markup + attrs + padding + separator - else: - close = self.closing_wiki_markup or "" - return self.wiki_markup + attrs + padding + separator + \ - str(self.contents) + close + close = self.closing_wiki_markup or "" + return self.wiki_markup + attrs + padding + separator + \ + str(self.contents) + close result = (" +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -22,7 +22,9 @@ from collections import defaultdict import re -from . import HTMLEntity, Node, Text +from ._base import Node +from .html_entity import HTMLEntity +from .text import Text from .extras import Parameter from ..utils import parse_anything @@ -43,12 +45,11 @@ class Template(Node): else: self._params = [] - def __unicode__(self): + def __str__(self): if self.params: params = "|".join([str(param) for param in self.params]) return "{{" + str(self.name) + "|" + params + "}}" - else: - return "{{" + str(self.name) + "}}" + return "{{" + str(self.name) + "}}" def __children__(self): yield self.name @@ -102,6 +103,7 @@ class Template(Node): confidence = float(best) / sum(values) if confidence > 0.5: return tuple(theories.keys())[values.index(best)] + return None @staticmethod def _blank_param_value(value): @@ -229,8 +231,7 @@ class Template(Node): return param if default is _UNSET: raise ValueError(name) - else: - return default + return default def __getitem__(self, name): return self.get(name) @@ -339,19 +340,20 @@ class Template(Node): hidden name, if it exists, or the first instance). """ if isinstance(param, Parameter): - return self._remove_exact(param, keep_field) + self._remove_exact(param, keep_field) + return name = str(param).strip() removed = False to_remove = [] - for i, param in enumerate(self.params): - if param.name.strip() == name: + for i, par in enumerate(self.params): + if par.name.strip() == name: if keep_field: if self._should_remove(i, name): to_remove.append(i) else: - self._blank_param_value(param.value) + self._blank_param_value(par.value) keep_field = False else: self._fix_dependendent_params(i) diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index b07eedc..cce670c 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node __all__ = ["Text"] @@ -31,7 +31,7 @@ class Text(Node): super().__init__() self.value = value - def __unicode__(self): + def __str__(self): return self.value def __strip__(self, **kwargs): diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 98ae75f..fc78833 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node from ..utils import parse_anything __all__ = ["Wikilink"] @@ -33,7 +33,7 @@ class Wikilink(Node): self.title = title self.text = text - def __unicode__(self): + def __str__(self): if self.text is not None: return "[[" + str(self.title) + "|" + str(self.text) + "]]" return "[[" + str(self.title) + "]]" diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index fb1bf20..cde45c5 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -25,20 +25,8 @@ modules: the :mod:`.tokenizer` and the :mod:`.builder`. This module joins them together into one interface. """ -class ParserError(Exception): - """Exception raised when an internal error occurs while parsing. - - This does not mean that the wikicode was invalid, because invalid markup - should still be parsed correctly. This means that the parser caught itself - with an impossible internal state and is bailing out before other problems - can happen. Its appearance indicates a bug. - """ - def __init__(self, extra): - msg = "This is a bug and should be reported. Info: {}.".format(extra) - super().__init__(msg) - - from .builder import Builder +from .errors import ParserError try: from ._tokenizer import CTokenizer use_c = True diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 1ae2150..4c14b2a 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,8 @@ # SOFTWARE. -from . import tokens, ParserError +from . import tokens +from .errors import ParserError from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from ..nodes.extras import Attribute, Parameter @@ -198,8 +199,7 @@ class Builder: if isinstance(token, tokens.HeadingEnd): title = self._pop() return Heading(title, level) - else: - self._write(self._handle_token(token)) + self._write(self._handle_token(token)) raise ParserError("_handle_heading() missed a close token") @_add_handler(tokens.CommentStart) @@ -211,8 +211,7 @@ class Builder: if isinstance(token, tokens.CommentEnd): contents = self._pop() return Comment(contents) - else: - self._write(self._handle_token(token)) + self._write(self._handle_token(token)) raise ParserError("_handle_comment() missed a close token") def _handle_attribute(self, start): @@ -283,7 +282,7 @@ class Builder: return _HANDLERS[type(token)](self, token) except KeyError: err = "_handle_token() got unexpected {0}" - raise ParserError(err.format(type(token).__name__)) + raise ParserError(err.format(type(token).__name__)) from None def build(self, tokenlist): """Build a Wikicode object from a list tokens and return it.""" diff --git a/mwparserfromhell/parser/ctokenizer/definitions.c b/mwparserfromhell/parser/ctokenizer/definitions.c index b1ff278..323d8a1 100644 --- a/mwparserfromhell/parser/ctokenizer/definitions.c +++ b/mwparserfromhell/parser/ctokenizer/definitions.c @@ -1,5 +1,5 @@ /* -Copyright (C) 2012-2016 Ben Kurtovic +Copyright (C) 2012-2020 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/mwparserfromhell/parser/errors.py b/mwparserfromhell/parser/errors.py new file mode 100644 index 0000000..adf3d5d --- /dev/null +++ b/mwparserfromhell/parser/errors.py @@ -0,0 +1,34 @@ +# +# Copyright (C) 2012-2020 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +__all__ = ["ParserError"] + +class ParserError(Exception): + """Exception raised when an internal error occurs while parsing. + + This does not mean that the wikicode was invalid, because invalid markup + should still be parsed correctly. This means that the parser caught itself + with an impossible internal state and is bailing out before other problems + can happen. Its appearance indicates a bug. + """ + def __init__(self, extra): + msg = "This is a bug and should be reported. Info: {}.".format(extra) + super().__init__(msg) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index a95c477..93b79d9 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,8 @@ import html.entities as htmlentities from math import log import re -from . import contexts, tokens, ParserError +from . import contexts, tokens +from .errors import ParserError from ..definitions import (get_html_tag, is_parsable, is_single, is_single_only, is_scheme) @@ -323,7 +324,7 @@ class Tokenizer: self._head += 2 try: # If the wikilink looks like an external link, parse it as such: - link, extra, delta = self._really_parse_external_link(True) + link, _extra, _delta = self._really_parse_external_link(True) except BadRoute: self._head = reset + 1 try: @@ -433,17 +434,17 @@ class Tokenizer: self._emit_text(this) return punct, tail - def _is_free_link_end(self, this, next): + def _is_free_link_end(self, this, nxt): """Return whether the current head is the end of a free link.""" # Built from _parse()'s end sentinels: after, ctx = self._read(2), self._context equal_sign_contexts = contexts.TEMPLATE_PARAM_KEY | contexts.HEADING return (this in (self.END, "\n", "[", "]", "<", ">") or - this == next == "'" or + this == nxt == "'" or (this == "|" and ctx & contexts.TEMPLATE) or (this == "=" and ctx & equal_sign_contexts) or - (this == next == "}" and ctx & contexts.TEMPLATE) or - (this == next == after == "}" and ctx & contexts.ARGUMENT)) + (this == nxt == "}" and ctx & contexts.TEMPLATE) or + (this == nxt == after == "}" and ctx & contexts.ARGUMENT)) def _really_parse_external_link(self, brackets): """Really parse an external link.""" @@ -458,23 +459,23 @@ class Tokenizer: self._fail_route() tail = "" while True: - this, next = self._read(), self._read(1) + this, nxt = self._read(), self._read(1) if this == "&": if tail: self._emit_text(tail) tail = "" self._parse_entity() - elif (this == "<" and next == "!" and self._read(2) == + elif (this == "<" and nxt == "!" and self._read(2) == self._read(3) == "-"): if tail: self._emit_text(tail) tail = "" self._parse_comment() - elif not brackets and self._is_free_link_end(this, next): + elif not brackets and self._is_free_link_end(this, nxt): return self._pop(), tail, -1 elif this is self.END or this == "\n": self._fail_route() - elif this == next == "{" and self._can_recurse(): + elif this == nxt == "{" and self._can_recurse(): if tail: self._emit_text(tail) tail = "" @@ -702,12 +703,12 @@ class Tokenizer: def _handle_tag_text(self, text): """Handle regular *text* inside of an HTML open tag.""" - next = self._read(1) + nxt = self._read(1) if not self._can_recurse() or text not in self.MARKERS: self._emit_text(text) - elif text == next == "{": + elif text == nxt == "{": self._parse_template_or_argument() - elif text == next == "[": + elif text == nxt == "[": self._parse_wikilink() elif text == "<": self._parse_tag() @@ -796,10 +797,10 @@ class Tokenizer: """Handle the body of an HTML tag that is parser-blacklisted.""" strip = lambda text: text.rstrip().lower() while True: - this, next = self._read(), self._read(1) + this, nxt = self._read(), self._read(1) if this is self.END: self._fail_route() - elif this == "<" and next == "/": + elif this == "<" and nxt == "/": self._head += 3 if self._read() != ">" or (strip(self._read(-1)) != strip(self._stack[1].text)): @@ -854,7 +855,7 @@ class Tokenizer: self._push(contexts.TAG_OPEN) self._emit(tokens.TagOpenOpen()) while True: - this, next = self._read(), self._read(1) + this, nxt = self._read(), self._read(1) can_exit = (not data.context & (data.CX_QUOTED | data.CX_NAME) or data.context & data.CX_NOTE_SPACE) if this is self.END: @@ -876,7 +877,7 @@ class Tokenizer: if is_parsable(self._stack[1].text): return self._parse(push=False) return self._handle_blacklisted_tag() - elif this == "/" and next == ">" and can_exit: + elif this == "/" and nxt == ">" and can_exit: self._handle_tag_close_open(data, tokens.TagCloseSelfclose) return self._pop() else: @@ -933,9 +934,11 @@ class Tokenizer: stack = self._parse(new_ctx) except BadRoute: self._head = reset - return self._emit_text("''") + self._emit_text("''") + return else: - return self._emit_text("''") + self._emit_text("''") + return self._emit_style_tag("i", "''", stack) def _parse_bold(self): @@ -948,7 +951,7 @@ class Tokenizer: if self._context & contexts.STYLE_SECOND_PASS: self._emit_text("'") return True - elif self._context & contexts.STYLE_ITALICS: + if self._context & contexts.STYLE_ITALICS: self._context |= contexts.STYLE_PASS_AGAIN self._emit_text("'''") else: @@ -956,6 +959,7 @@ class Tokenizer: self._parse_italics() else: self._emit_style_tag("b", "'''", stack) + return False def _parse_italics_and_bold(self): """Parse wiki-style italics and bold together (i.e., five ticks).""" @@ -1017,7 +1021,7 @@ class Tokenizer: if ticks == 5: self._head -= 3 if italics else 2 return self._pop() - elif not self._can_recurse(): + if not self._can_recurse(): if ticks == 3: if self._context & contexts.STYLE_SECOND_PASS: self._emit_text("'") @@ -1101,7 +1105,7 @@ class Tokenizer: if this.isspace(): data.padding_buffer["first"] += this return data.padding_buffer["first"] - elif this is self.END or this == end_token: + if this is self.END or this == end_token: if self._context & contexts.TAG_ATTR: if data.context & data.CX_QUOTED: # Unclosed attribute quote: reset, don't die @@ -1241,9 +1245,9 @@ class Tokenizer: if context & contexts.FAIL_NEXT: return False if context & contexts.WIKILINK_TITLE: - if this == "]" or this == "{": + if this in ("]", "{"): self._context |= contexts.FAIL_NEXT - elif this == "\n" or this == "[" or this == "}" or this == ">": + elif this in ("\n", "[", "}", ">"): return False elif this == "<": if self._read(1) == "!": @@ -1251,16 +1255,16 @@ class Tokenizer: else: return False return True - elif context & contexts.EXT_LINK_TITLE: + if context & contexts.EXT_LINK_TITLE: return this != "\n" - elif context & contexts.TEMPLATE_NAME: + if context & contexts.TEMPLATE_NAME: if this == "{": self._context |= contexts.HAS_TEMPLATE | contexts.FAIL_NEXT return True if this == "}" or (this == "<" and self._read(1) == "!"): self._context |= contexts.FAIL_NEXT return True - if this == "[" or this == "]" or this == "<" or this == ">": + if this in ("[", "]", "<", ">"): return False if this == "|": return True @@ -1273,30 +1277,29 @@ class Tokenizer: elif this is self.END or not this.isspace(): self._context |= contexts.HAS_TEXT return True - elif context & contexts.TAG_CLOSE: + if context & contexts.TAG_CLOSE: return this != "<" - else: - if context & contexts.FAIL_ON_EQUALS: - if this == "=": - return False - elif context & contexts.FAIL_ON_LBRACE: - if this == "{" or (self._read(-1) == self._read(-2) == "{"): - if context & contexts.TEMPLATE: - self._context |= contexts.FAIL_ON_EQUALS - else: - self._context |= contexts.FAIL_NEXT - return True - self._context ^= contexts.FAIL_ON_LBRACE - elif context & contexts.FAIL_ON_RBRACE: - if this == "}": + if context & contexts.FAIL_ON_EQUALS: + if this == "=": + return False + elif context & contexts.FAIL_ON_LBRACE: + if this == "{" or (self._read(-1) == self._read(-2) == "{"): + if context & contexts.TEMPLATE: + self._context |= contexts.FAIL_ON_EQUALS + else: self._context |= contexts.FAIL_NEXT - return True - self._context ^= contexts.FAIL_ON_RBRACE - elif this == "{": - self._context |= contexts.FAIL_ON_LBRACE - elif this == "}": - self._context |= contexts.FAIL_ON_RBRACE - return True + return True + self._context ^= contexts.FAIL_ON_LBRACE + elif context & contexts.FAIL_ON_RBRACE: + if this == "}": + self._context |= contexts.FAIL_NEXT + return True + self._context ^= contexts.FAIL_ON_RBRACE + elif this == "{": + self._context |= contexts.FAIL_ON_LBRACE + elif this == "}": + self._context |= contexts.FAIL_ON_RBRACE + return True def _parse(self, context=0, push=True): """Parse the wikicode string, using *context* for when to stop.""" @@ -1315,8 +1318,8 @@ class Tokenizer: continue if this is self.END: return self._handle_end() - next = self._read(1) - if this == next == "{": + nxt = self._read(1) + if this == nxt == "{": if self._can_recurse(): self._parse_template_or_argument() else: @@ -1325,23 +1328,22 @@ class Tokenizer: self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: self._handle_template_param_value() - elif this == next == "}" and self._context & contexts.TEMPLATE: + elif this == nxt == "}" and self._context & contexts.TEMPLATE: return self._handle_template_end() elif this == "|" and self._context & contexts.ARGUMENT_NAME: self._handle_argument_separator() - elif this == next == "}" and self._context & contexts.ARGUMENT: + elif this == nxt == "}" and self._context & contexts.ARGUMENT: if self._read(2) == "}": return self._handle_argument_end() - else: - self._emit_text("}") - elif this == next == "[" and self._can_recurse(): + self._emit_text("}") + elif this == nxt == "[" and self._can_recurse(): if not self._context & contexts.NO_WIKILINKS: self._parse_wikilink() else: self._emit_text("[") elif this == "|" and self._context & contexts.WIKILINK_TITLE: self._handle_wikilink_separator() - elif this == next == "]" and self._context & contexts.WIKILINK: + elif this == nxt == "]" and self._context & contexts.WIKILINK: return self._handle_wikilink_end() elif this == "[": self._parse_external_link(True) @@ -1360,12 +1362,12 @@ class Tokenizer: self._fail_route() elif this == "&": self._parse_entity() - elif this == "<" and next == "!": + elif this == "<" and nxt == "!": if self._read(2) == self._read(3) == "-": self._parse_comment() else: self._emit_text(this) - elif this == "<" and next == "/" and self._read(2) is not self.END: + elif this == "<" and nxt == "/" and self._read(2) is not self.END: if self._context & contexts.TAG_BODY: self._handle_tag_open_close() else: @@ -1377,14 +1379,14 @@ class Tokenizer: self._emit_text("<") elif this == ">" and self._context & contexts.TAG_CLOSE: return self._handle_tag_close_close() - elif this == next == "'" and not self._skip_style_tags: + elif this == nxt == "'" and not self._skip_style_tags: result = self._parse_style() if result is not None: return result elif self._read(-1) in ("\n", self.START) and this in ("#", "*", ";", ":"): self._handle_list() elif self._read(-1) in ("\n", self.START) and ( - this == next == self._read(2) == self._read(3) == "-"): + this == nxt == self._read(2) == self._read(3) == "-"): self._handle_hr() elif this in ("\n", ":") and self._context & contexts.DL_TERM: self._handle_dl_term() @@ -1392,7 +1394,7 @@ class Tokenizer: # Kill potential table contexts self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS # Start of table parsing - elif this == "{" and next == "|" and ( + elif this == "{" and nxt == "|" and ( self._read(-1) in ("\n", self.START) or (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): if self._can_recurse(): @@ -1400,15 +1402,15 @@ class Tokenizer: else: self._emit_text("{") elif self._context & contexts.TABLE_OPEN: - if this == next == "|" and self._context & contexts.TABLE_TD_LINE: + if this == nxt == "|" and self._context & contexts.TABLE_TD_LINE: if self._context & contexts.TABLE_CELL_OPEN: return self._handle_table_cell_end() self._handle_table_cell("||", "td", contexts.TABLE_TD_LINE) - elif this == next == "|" and self._context & contexts.TABLE_TH_LINE: + elif this == nxt == "|" and self._context & contexts.TABLE_TH_LINE: if self._context & contexts.TABLE_CELL_OPEN: return self._handle_table_cell_end() self._handle_table_cell("||", "th", contexts.TABLE_TH_LINE) - elif this == next == "!" and self._context & contexts.TABLE_TH_LINE: + elif this == nxt == "!" and self._context & contexts.TABLE_TH_LINE: if self._context & contexts.TABLE_CELL_OPEN: return self._handle_table_cell_end() self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE) @@ -1420,13 +1422,13 @@ class Tokenizer: self._emit_text(this) elif (self._read(-1) in ("\n", self.START) or (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): - if this == "|" and next == "}": + if this == "|" and nxt == "}": if self._context & contexts.TABLE_CELL_OPEN: return self._handle_table_cell_end() if self._context & contexts.TABLE_ROW_OPEN: return self._handle_table_row_end() return self._handle_table_end() - elif this == "|" and next == "-": + if this == "|" and nxt == "-": if self._context & contexts.TABLE_CELL_OPEN: return self._handle_table_cell_end() if self._context & contexts.TABLE_ROW_OPEN: @@ -1458,10 +1460,10 @@ class Tokenizer: self._skip_style_tags = skip_style_tags try: - tokens = self._parse(context) - except BadRoute: # pragma: no cover (untestable/exceptional case) - raise ParserError("Python tokenizer exited with BadRoute") + result = self._parse(context) + except BadRoute as exc: # pragma: no cover (untestable/exceptional case) + raise ParserError("Python tokenizer exited with BadRoute") from exc if self._stacks: # pragma: no cover (untestable/exceptional case) err = "Python tokenizer exited with non-empty token stack" raise ParserError(err) - return tokens + return result diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index ec99c67..257ed89 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ a syntactically valid form by the :class:`.Tokenizer`, and then converted into the :class`.Wikicode` tree by the :class:`.Builder`. """ - __all__ = ["Token"] class Token(dict): diff --git a/mwparserfromhell/smart_list/__init__.py b/mwparserfromhell/smart_list/__init__.py index fdf7bd8..723d992 100644 --- a/mwparserfromhell/smart_list/__init__.py +++ b/mwparserfromhell/smart_list/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -22,8 +22,9 @@ """ This module contains the :class:`.SmartList` type, as well as its -:class:`._ListProxy` child, which together implement a list whose sublists +:class:`.ListProxy` child, which together implement a list whose sublists reflect changes made to the main list, and vice-versa. """ -from .SmartList import SmartList +from .list_proxy import ListProxy as _ListProxy +from .smart_list import SmartList diff --git a/mwparserfromhell/smart_list/ListProxy.py b/mwparserfromhell/smart_list/list_proxy.py similarity index 89% rename from mwparserfromhell/smart_list/ListProxy.py rename to mwparserfromhell/smart_list/list_proxy.py index 35b45dc..f1525fc 100644 --- a/mwparserfromhell/smart_list/ListProxy.py +++ b/mwparserfromhell/smart_list/list_proxy.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -20,12 +20,10 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -# SmartList has to be a full import in order to avoid cyclical import errors -import mwparserfromhell.smart_list.SmartList from .utils import _SliceNormalizerMixIn, inheritdoc -class _ListProxy(_SliceNormalizerMixIn, list): +class ListProxy(_SliceNormalizerMixIn, list): """Implement the ``list`` interface by getting elements from a parent. This is created by a :class:`.SmartList` object when slicing. It does not @@ -42,32 +40,32 @@ class _ListProxy(_SliceNormalizerMixIn, list): return repr(self._render()) def __lt__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() < list(other) return self._render() < other def __le__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() <= list(other) return self._render() <= other def __eq__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() == list(other) return self._render() == other def __ne__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() != list(other) return self._render() != other def __gt__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() > list(other) return self._render() > other def __ge__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() >= list(other) return self._render() >= other @@ -84,8 +82,7 @@ class _ListProxy(_SliceNormalizerMixIn, list): keystop = min(self._start + key.stop, self._stop) adjusted = slice(keystart, keystop, key.step) return self._parent[adjusted] - else: - return self._render()[key] + return self._render()[key] def __setitem__(self, key, item): if isinstance(key, slice): @@ -133,20 +130,20 @@ class _ListProxy(_SliceNormalizerMixIn, list): return item in self._render() def __add__(self, other): - return mwparserfromhell.smart_list.SmartList(list(self) + other) + return type(self._parent)(list(self) + other) def __radd__(self, other): - return mwparserfromhell.smart_list.SmartList(other + list(self)) + return type(self._parent)(other + list(self)) def __iadd__(self, other): self.extend(other) return self def __mul__(self, other): - return mwparserfromhell.smart_list.SmartList(list(self) * other) + return type(self._parent)(list(self) * other) def __rmul__(self, other): - return mwparserfromhell.smart_list.SmartList(other * list(self)) + return type(self._parent)(other * list(self)) def __imul__(self, other): self.extend(list(self) * (other - 1)) diff --git a/mwparserfromhell/smart_list/SmartList.py b/mwparserfromhell/smart_list/smart_list.py similarity index 91% rename from mwparserfromhell/smart_list/SmartList.py rename to mwparserfromhell/smart_list/smart_list.py index c2e83a4..f83e181 100644 --- a/mwparserfromhell/smart_list/SmartList.py +++ b/mwparserfromhell/smart_list/smart_list.py @@ -1,4 +1,5 @@ -# Copyright (C) 2012-2016 Ben Kurtovic +# +# Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -19,9 +20,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from _weakref import ref +from weakref import ref -from .ListProxy import _ListProxy +from .list_proxy import ListProxy from .utils import _SliceNormalizerMixIn, inheritdoc @@ -32,7 +33,7 @@ class SmartList(_SliceNormalizerMixIn, list): list (such as the addition, removal, or replacement of elements) will be reflected in the sublist, or vice-versa, to the greatest degree possible. This is implemented by having sublists - instances of the - :class:`._ListProxy` type - dynamically determine their elements by storing + :class:`.ListProxy` type - dynamically determine their elements by storing their slice info and retrieving that slice from the parent. Methods that change the size of the list also change the slice info. For example:: @@ -61,21 +62,22 @@ class SmartList(_SliceNormalizerMixIn, list): return super().__getitem__(key) key = self._normalize_slice(key, clamp=False) sliceinfo = [key.start, key.stop, key.step] - child = _ListProxy(self, sliceinfo) + child = ListProxy(self, sliceinfo) child_ref = ref(child, self._delete_child) self._children[id(child_ref)] = (child_ref, sliceinfo) return child def __setitem__(self, key, item): if not isinstance(key, slice): - return super().__setitem__(key, item) + super().__setitem__(key, item) + return item = list(item) super().__setitem__(key, item) key = self._normalize_slice(key, clamp=True) diff = len(item) + (key.start - key.stop) // key.step if not diff: return - for child, (start, stop, step) in self._children.values(): + for child, (start, stop, _step) in self._children.values(): if start > key.stop: self._children[id(child)][1][0] += diff if stop is not None and stop >= key.stop: @@ -88,7 +90,7 @@ class SmartList(_SliceNormalizerMixIn, list): else: key = slice(key, key + 1, 1) diff = (key.stop - key.start) // key.step - for child, (start, stop, step) in self._children.values(): + for child, (start, stop, _step) in self._children.values(): if start > key.start: self._children[id(child)][1][0] -= diff if stop is not None and stop >= key.stop: diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 564706d..f39cce7 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -38,67 +38,64 @@ def inheritdoc(method): return method class StringMixIn: - """Implement the interface for ``unicode``/``str`` in a dynamic manner. + """Implement the interface for ``str`` in a dynamic manner. - To use this class, inherit from it and override the :meth:`__unicode__` - method to return the string representation of the object. - The various string methods will operate on the value of :meth:`__unicode__` - instead of the immutable ``self`` like the regular ``str`` type. + To use this class, inherit from it and override the :meth:`__str__` method + to return the string representation of the object. The various string + methods will operate on the value of :meth:`__str__` instead of the + immutable ``self`` like the regular ``str`` type. """ def __str__(self): - return self.__unicode__() + raise NotImplementedError() def __bytes__(self): - return bytes(self.__unicode__(), getdefaultencoding()) - - def __unicode__(self): - raise NotImplementedError() + return bytes(self.__str__(), getdefaultencoding()) def __repr__(self): - return repr(self.__unicode__()) + return repr(self.__str__()) def __lt__(self, other): - return self.__unicode__() < other + return self.__str__() < other def __le__(self, other): - return self.__unicode__() <= other + return self.__str__() <= other def __eq__(self, other): - return self.__unicode__() == other + return self.__str__() == other def __ne__(self, other): - return self.__unicode__() != other + return self.__str__() != other def __gt__(self, other): - return self.__unicode__() > other + return self.__str__() > other def __ge__(self, other): - return self.__unicode__() >= other + return self.__str__() >= other def __bool__(self): - return bool(self.__unicode__()) + return bool(self.__str__()) def __len__(self): - return len(self.__unicode__()) + return len(self.__str__()) def __iter__(self): - yield from self.__unicode__() + yield from self.__str__() def __getitem__(self, key): - return self.__unicode__()[key] + return self.__str__()[key] def __reversed__(self): - return reversed(self.__unicode__()) + return reversed(self.__str__()) def __contains__(self, item): - return str(item) in self.__unicode__() + return str(item) in self.__str__() def __getattr__(self, attr): if not hasattr(str, attr): raise AttributeError("{!r} object has no attribute {!r}".format( type(self).__name__, attr)) - return getattr(self.__unicode__(), attr) + return getattr(self.__str__(), attr) maketrans = str.maketrans # Static method can't rely on __getattr__ diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 9e5e14b..8fa3a96 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -24,48 +24,47 @@ This module contains accessory functions for other parts of the library. Parser users generally won't need stuff from here. """ - -from .nodes import Node -from .smart_list import SmartList - __all__ = ["parse_anything"] def parse_anything(value, context=0, skip_style_tags=False): """Return a :class:`.Wikicode` for *value*, allowing multiple types. This differs from :meth:`.Parser.parse` in that we accept more than just a - string to be parsed. Unicode objects (strings in py3k), strings (bytes in - py3k), integers (converted to strings), ``None``, existing :class:`.Node` - or :class:`.Wikicode` objects, as well as an iterable of these types, are - supported. This is used to parse input on-the-fly by various methods of - :class:`.Wikicode` and others like :class:`.Template`, such as - :meth:`wikicode.insert() <.Wikicode.insert>` or setting - :meth:`template.name <.Template.name>`. + string to be parsed. Strings, bytes, integers (converted to strings), + ``None``, existing :class:`.Node` or :class:`.Wikicode` objects, as well + as an iterable of these types, are supported. This is used to parse input + on-the-fly by various methods of :class:`.Wikicode` and others like + :class:`.Template`, such as :meth:`wikicode.insert() <.Wikicode.insert>` + or setting :meth:`template.name <.Template.name>`. Additional arguments are passed directly to :meth:`.Parser.parse`. """ + # pylint: disable=cyclic-import,import-outside-toplevel + from .nodes import Node from .parser import Parser + from .smart_list import SmartList from .wikicode import Wikicode if isinstance(value, Wikicode): return value - elif isinstance(value, Node): + if isinstance(value, Node): return Wikicode(SmartList([value])) - elif isinstance(value, str): + if isinstance(value, str): return Parser().parse(value, context, skip_style_tags) - elif isinstance(value, bytes): + if isinstance(value, bytes): return Parser().parse(value.decode("utf8"), context, skip_style_tags) - elif isinstance(value, int): + if isinstance(value, int): return Parser().parse(str(value), context, skip_style_tags) - elif value is None: + if value is None: return Wikicode(SmartList()) - elif hasattr(value, "read"): + if hasattr(value, "read"): return parse_anything(value.read(), context, skip_style_tags) try: nodelist = SmartList() for item in value: nodelist += parse_anything(item, context, skip_style_tags).nodes return Wikicode(nodelist) - except TypeError: - error = "Needs string, Node, Wikicode, file, int, None, or iterable of these, but got {0}: {1}" - raise ValueError(error.format(type(value).__name__, value)) + except TypeError as exc: + raise ValueError(f"Needs string, Node, Wikicode, file, int, None, or " + f"iterable of these, but got {type(value).__name__}: " + f"{value}") from exc diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index f72c26b..381d938 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ from itertools import chain from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) -from .smart_list.ListProxy import _ListProxy +from .smart_list.list_proxy import ListProxy from .string_mixin import StringMixIn from .utils import parse_anything @@ -48,7 +48,7 @@ class Wikicode(StringMixIn): super().__init__() self._nodes = nodes - def __unicode__(self): + def __str__(self): return "".join([str(node) for node in self.nodes]) @staticmethod @@ -108,7 +108,7 @@ class Wikicode(StringMixIn): def _is_child_wikicode(self, obj, recursive=True): """Return whether the given :class:`.Wikicode` is a descendant.""" def deref(nodes): - if isinstance(nodes, _ListProxy): + if isinstance(nodes, ListProxy): return nodes._parent # pylint: disable=protected-access return nodes @@ -249,12 +249,12 @@ class Wikicode(StringMixIn): make_filter = lambda ftype: (lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw)) for name, ftype in meths.items(): - ifilter = make_ifilter(ftype) - filter = make_filter(ftype) - ifilter.__doc__ = doc.format(name, "ifilter", ftype) - filter.__doc__ = doc.format(name, "filter", ftype) - setattr(cls, "ifilter_" + name, ifilter) - setattr(cls, "filter_" + name, filter) + ifilt = make_ifilter(ftype) + filt = make_filter(ftype) + ifilt.__doc__ = doc.format(name, "ifilter", ftype) + filt.__doc__ = doc.format(name, "filter", ftype) + setattr(cls, "ifilter_" + name, ifilt) + setattr(cls, "filter_" + name, filt) @property def nodes(self): @@ -351,6 +351,7 @@ class Wikicode(StringMixIn): ancestors = _get_ancestors(code, needle) if ancestors is not None: return [node] + ancestors + return None if isinstance(obj, Wikicode): obj = obj.get(0) @@ -443,13 +444,13 @@ class Wikicode(StringMixIn): """ if isinstance(obj, (Node, Wikicode)): context, index = self._do_strong_search(obj, recursive) - for i in range(index.start, index.stop): + for _ in range(index.start, index.stop): context.nodes.pop(index.start) context.insert(index.start, value) else: for exact, context, index in self._do_weak_search(obj, recursive): if exact: - for i in range(index.start, index.stop): + for _ in range(index.start, index.stop): context.nodes.pop(index.start) context.insert(index.start, value) else: @@ -478,12 +479,12 @@ class Wikicode(StringMixIn): """ if isinstance(obj, (Node, Wikicode)): context, index = self._do_strong_search(obj, recursive) - for i in range(index.start, index.stop): + for _ in range(index.start, index.stop): context.nodes.pop(index.start) else: for exact, context, index in self._do_weak_search(obj, recursive): if exact: - for i in range(index.start, index.stop): + for _ in range(index.start, index.stop): context.nodes.pop(index.start) else: self._slice_replace(context, index, str(obj), "") @@ -645,8 +646,7 @@ class Wikicode(StringMixIn): while "\n\n\n" in stripped: stripped = stripped.replace("\n\n\n", "\n\n") return stripped - else: - return "".join(nodes) + return "".join(nodes) def get_tree(self): """Return a hierarchical tree representation of the object. diff --git a/scripts/memtest.py b/scripts/memtest.py index f60e260..3da1fcc 100644 --- a/scripts/memtest.py +++ b/scripts/memtest.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,10 @@ # SOFTWARE. """ -Tests for memory leaks in the CTokenizer. Python 2 and 3 compatible. +Tests for memory leaks in the CTokenizer. This appears to work mostly fine under Linux, but gives an absurd number of -false positives on OS X. I'm not sure why. Running the tests multiple times +false positives on macOS. I'm not sure why. Running the tests multiple times yields different results (tests don't always leak, and the amount they leak by varies). Increasing the number of loops results in a smaller bytes/loop value, too, indicating the increase in memory usage might be due to something else. @@ -32,7 +32,6 @@ Actual memory leaks typically leak very large amounts of memory (megabytes) and scale with the number of loops. """ -from __future__ import unicode_literals, print_function from locale import LC_ALL, setlocale from multiprocessing import Process, Pipe from os import listdir, path @@ -42,19 +41,16 @@ import psutil from mwparserfromhell.parser._tokenizer import CTokenizer -if sys.version_info[0] == 2: - range = xrange - LOOPS = 10000 -class Color(object): +class Color: GRAY = "\x1b[30;1m" GREEN = "\x1b[92m" YELLOW = "\x1b[93m" RESET = "\x1b[0m" -class MemoryTest(object): +class MemoryTest: """Manages a memory test.""" def __init__(self): @@ -151,13 +147,13 @@ class MemoryTest(object): def _runner(text, child): r1, r2 = range(250), range(LOOPS) - for i in r1: + for _ in r1: CTokenizer().tokenize(text) child.send("OK") child.recv() child.send("OK") child.recv() - for i in r2: + for _ in r2: CTokenizer().tokenize(text) child.send("OK") child.recv() diff --git a/setup.py b/setup.py index d404ead..6fee34a 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #! /usr/bin/env python # -# Copyright (C) 2012-2018 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index f61cb10..2629671 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,6 @@ from mwparserfromhell.parser.builder import Builder class _TestParseError(Exception): """Raised internally when a test could not be parsed.""" - pass class TokenizerTestCase: @@ -41,7 +40,7 @@ class TokenizerTestCase: """ @staticmethod - def _build_test_method(funcname, data): + def _build_test_method(data): """Create and return a method to be treated as a test case method. *data* is a dict containing multiple keys: the *input* text to be @@ -79,7 +78,7 @@ class TokenizerTestCase: try: data["output"] = eval(raw, vars(tokens)) except Exception as err: - raise _TestParseError(err) + raise _TestParseError(err) from err @classmethod def _load_tests(cls, filename, name, text, restrict=None): @@ -115,7 +114,7 @@ class TokenizerTestCase: continue fname = "test_{}{}_{}".format(name, number, data["name"]) - meth = cls._build_test_method(fname, data) + meth = cls._build_test_method(data) setattr(cls, fname, meth) @classmethod diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index cdfbd3a..407711e 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,6 @@ from unittest import TestCase from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, Text, Wikilink) -from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode diff --git a/tests/test_argument.py b/tests/test_argument.py index eaf8abe..110436a 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestArgument(TreeEqualityTestCase): """Test cases for the Argument node.""" - def test_unicode(self): - """test Argument.__unicode__()""" + def test_str(self): + """test Argument.__str__()""" node = Argument(wraptext("foobar")) self.assertEqual("{{{foobar}}}", str(node)) node2 = Argument(wraptext("foo"), wraptext("bar")) diff --git a/tests/test_attribute.py b/tests/test_attribute.py index b0d0e85..a8be214 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,8 +29,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestAttribute(TreeEqualityTestCase): """Test cases for the Attribute node extra.""" - def test_unicode(self): - """test Attribute.__unicode__()""" + def test_str(self): + """test Attribute.__str__()""" node = Attribute(wraptext("foo")) self.assertEqual(" foo", str(node)) node2 = Attribute(wraptext("foo"), wraptext("bar")) diff --git a/tests/test_comment.py b/tests/test_comment.py index 1024e60..60cbba6 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase class TestComment(TreeEqualityTestCase): """Test cases for the Comment node.""" - def test_unicode(self): - """test Comment.__unicode__()""" + def test_str(self): + """test Comment.__str__()""" node = Comment("foobar") self.assertEqual("", str(node)) diff --git a/tests/test_docs.py b/tests/test_docs.py index 2e78106..101a347 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -31,10 +31,10 @@ import mwparserfromhell class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" - def assertPrint(self, input, output): - """Assertion check that *input*, when printed, produces *output*.""" + def assertPrint(self, value, output): + """Assertion check that *value*, when printed, produces *output*.""" buff = StringIO() - print(input, end="", file=buff) + print(value, end="", file=buff) buff.seek(0) self.assertEqual(output, buff.read()) diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 48a7b82..1323109 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestExternalLink(TreeEqualityTestCase): """Test cases for the ExternalLink node.""" - def test_unicode(self): - """test ExternalLink.__unicode__()""" + def test_str(self): + """test ExternalLink.__str__()""" node = ExternalLink(wraptext("http://example.com/"), brackets=False) self.assertEqual("http://example.com/", str(node)) node2 = ExternalLink(wraptext("http://example.com/")) diff --git a/tests/test_heading.py b/tests/test_heading.py index 46c6258..a031332 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestHeading(TreeEqualityTestCase): """Test cases for the Heading node.""" - def test_unicode(self): - """test Heading.__unicode__()""" + def test_str(self): + """test Heading.__str__()""" node = Heading(wraptext("foobar"), 2) self.assertEqual("==foobar==", str(node)) node2 = Heading(wraptext(" zzz "), 5) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 273ee21..d3a9bd2 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,13 +23,13 @@ import unittest from mwparserfromhell.nodes import HTMLEntity -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase class TestHTMLEntity(TreeEqualityTestCase): """Test cases for the HTMLEntity node.""" - def test_unicode(self): - """test HTMLEntity.__unicode__()""" + def test_str(self): + """test HTMLEntity.__str__()""" node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) node2 = HTMLEntity("107", named=False, hexadecimal=False) node3 = HTMLEntity("6b", named=False, hexadecimal=True) diff --git a/tests/test_parameter.py b/tests/test_parameter.py index d53c7af..3d0028e 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,16 +21,15 @@ import unittest -from mwparserfromhell.nodes import Text from mwparserfromhell.nodes.extras import Parameter -from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wraptext class TestParameter(TreeEqualityTestCase): """Test cases for the Parameter node extra.""" - def test_unicode(self): - """test Parameter.__unicode__()""" + def test_str(self): + """test Parameter.__str__()""" node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) self.assertEqual("foo", str(node)) node2 = Parameter(wraptext("foo"), wraptext("bar")) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 16d99e7..58b327a 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -22,11 +22,11 @@ import unittest from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.smart_list.ListProxy import _ListProxy +from mwparserfromhell.smart_list.list_proxy import ListProxy class TestSmartList(unittest.TestCase): - """Test cases for the SmartList class and its child, _ListProxy.""" + """Test cases for the SmartList class and its child, ListProxy.""" def _test_get_set_del_item(self, builder): """Run tests on __get/set/delitem__ of a list built with *builder*.""" @@ -178,7 +178,7 @@ class TestSmartList(unittest.TestCase): gen1 = iter(list1) out = [] - for i in range(len(list1)): + for _ in range(len(list1)): out.append(next(gen1)) self.assertRaises(StopIteration, next, gen1) self.assertEqual([0, 1, 2, 3, "one", "two"], out) @@ -260,7 +260,8 @@ class TestSmartList(unittest.TestCase): list3.sort(key=lambda i: i[1], reverse=True) self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) - def _dispatch_test_for_children(self, meth): + @staticmethod + def _dispatch_test_for_children(meth): """Run a test method on various different types of children.""" meth(lambda L: SmartList(list(L))[:]) meth(lambda L: SmartList([999] + list(L))[1:]) @@ -268,13 +269,13 @@ class TestSmartList(unittest.TestCase): meth(lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]) def test_docs(self): - """make sure the methods of SmartList/_ListProxy have docstrings""" + """make sure the methods of SmartList/ListProxy have docstrings""" methods = ["append", "count", "extend", "index", "insert", "pop", "remove", "reverse", "sort"] for meth in methods: expected = getattr(list, meth).__doc__ smartlist_doc = getattr(SmartList, meth).__doc__ - listproxy_doc = getattr(_ListProxy, meth).__doc__ + listproxy_doc = getattr(ListProxy, meth).__doc__ self.assertEqual(expected, smartlist_doc) self.assertEqual(expected, listproxy_doc) @@ -305,19 +306,19 @@ class TestSmartList(unittest.TestCase): self._test_list_methods(SmartList) def test_child_get_set_del(self): - """make sure _ListProxy's getitem/setitem/delitem work""" + """make sure ListProxy's getitem/setitem/delitem work""" self._dispatch_test_for_children(self._test_get_set_del_item) def test_child_add(self): - """make sure _ListProxy's add/radd/iadd work""" + """make sure ListProxy's add/radd/iadd work""" self._dispatch_test_for_children(self._test_add_radd_iadd) def test_child_other_magics(self): - """make sure _ListProxy's other magically implemented features work""" + """make sure ListProxy's other magically implemented features work""" self._dispatch_test_for_children(self._test_other_magic_methods) def test_child_methods(self): - """make sure _ListProxy's non-magic methods work, like append()""" + """make sure ListProxy's non-magic methods work, like append()""" self._dispatch_test_for_children(self._test_list_methods) def test_influence(self): diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 673d5fa..aa13f11 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,7 @@ class _FakeString(StringMixIn): def __init__(self, data): self._data = data - def __unicode__(self): + def __str__(self): return self._data @@ -128,7 +128,7 @@ class TestStringMixIn(unittest.TestCase): self.assertIsInstance(gen2, GeneratorType) out = [] - for i in range(len(str1)): + for _ in range(len(str1)): out.append(next(gen1)) self.assertRaises(StopIteration, next, gen1) self.assertEqual(expected, out) diff --git a/tests/test_tag.py b/tests/test_tag.py index 860a94b..1fb82e9 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -34,8 +34,8 @@ agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c) class TestTag(TreeEqualityTestCase): """Test cases for the Tag node.""" - def test_unicode(self): - """test Tag.__unicode__()""" + def test_str(self): + """test Tag.__str__()""" node1 = Tag(wraptext("ref")) node2 = Tag(wraptext("span"), wraptext("foo"), [agen("style", "color: red;")]) @@ -227,7 +227,7 @@ class TestTag(TreeEqualityTestCase): node.wiki_markup = "{" self.assertEqual("{|\n{", node) node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|") - self.assertEqual("|", node.wiki_style_separator) + self.assertEqual("|", node2.wiki_style_separator) def test_closing_wiki_markup(self): """test getter/setter for closing_wiki_markup attribute""" diff --git a/tests/test_template.py b/tests/test_template.py index 461371d..34dd32d 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2017 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -34,8 +34,8 @@ pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) class TestTemplate(TreeEqualityTestCase): """Test cases for the Template node.""" - def test_unicode(self): - """test Template.__unicode__()""" + def test_str(self): + """test Template.__str__()""" node = Template(wraptext("foobar")) self.assertEqual("{{foobar}}", str(node)) node2 = Template(wraptext("foo"), diff --git a/tests/test_text.py b/tests/test_text.py index 94da937..a54311a 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -26,8 +26,8 @@ from mwparserfromhell.nodes import Text class TestText(unittest.TestCase): """Test cases for the Text node.""" - def test_unicode(self): - """test Text.__unicode__()""" + def test_str(self): + """test Text.__str__()""" node = Text("foobar") self.assertEqual("foobar", str(node)) node2 = Text("fóóbar") diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 6ce28b5..1f6c02a 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -61,12 +61,9 @@ class TestTokens(unittest.TestCase): hundredchars = ("earwig" * 100)[:97] + "..." self.assertEqual("Token()", repr(token1)) - token2repr1 = "Token(foo='bar', baz=123)" - token2repr2 = "Token(baz=123, foo='bar')" - token3repr = "Text(text='" + hundredchars + "')" - token2repr = repr(token2) - self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) - self.assertEqual(token3repr, repr(token3)) + self.assertTrue(repr(token2) in ( + "Token(foo='bar', baz=123)", "Token(baz=123, foo='bar')")) + self.assertEqual("Text(text='" + hundredchars + "')", repr(token3)) def test_equality(self): """check that equivalent tokens are considered equal""" diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 9701865..0188ad0 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -24,8 +24,7 @@ import re from types import GeneratorType import unittest -from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, - Node, Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes import Argument, Heading, Template, Text from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode from mwparserfromhell import parse @@ -35,8 +34,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestWikicode(TreeEqualityTestCase): """Tests for the Wikicode class, which manages a list of nodes.""" - def test_unicode(self): - """test Wikicode.__unicode__()""" + def test_str(self): + """test Wikicode.__str__()""" code1 = parse("foobar") code2 = parse("Have a {{template}} and a [[page|link]]") self.assertEqual("foobar", str(code1)) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 1865b6e..597c18f 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestWikilink(TreeEqualityTestCase): """Test cases for the Wikilink node.""" - def test_unicode(self): - """test Wikilink.__unicode__()""" + def test_str(self): + """test Wikilink.__str__()""" node = Wikilink(wraptext("foobar")) self.assertEqual("[[foobar]]", str(node)) node2 = Wikilink(wraptext("foo"), wraptext("bar")) From d36aa34997db1cbe04901f0883793b218650e63b Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 21 Dec 2020 03:33:28 -0500 Subject: [PATCH 26/31] Don't use f-strings; we still support Python 3.5 --- mwparserfromhell/nodes/html_entity.py | 13 +++++++------ mwparserfromhell/utils.py | 6 +++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 7371f2e..b08d2ee 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -101,19 +101,20 @@ class HTMLEntity(Node): intval = int(newval, 16) except ValueError: if newval not in htmlentities.entitydefs: - raise ValueError(f"entity value {newval!r} is not a valid name") from None + raise ValueError( + "entity value {!r} is not a valid name".format(newval)) from None self._named = True self._hexadecimal = False else: if intval < 0 or intval > 0x10FFFF: raise ValueError( - f"entity value 0x{intval:x} is not in range(0x110000)") from None + "entity value 0x{:x} is not in range(0x110000)".format(intval)) from None self._named = False self._hexadecimal = True else: test = int(newval, 16 if self.hexadecimal else 10) if test < 0 or test > 0x10FFFF: - raise ValueError(f"entity value {test} is not in range(0x110000)") + raise ValueError("entity value {} is not in range(0x110000)".format(test)) self._named = False self._value = newval @@ -121,13 +122,13 @@ class HTMLEntity(Node): def named(self, newval): newval = bool(newval) if newval and self.value not in htmlentities.entitydefs: - raise ValueError(f"entity value {self.value!r} is not a valid name") + raise ValueError("entity value {!r} is not a valid name".format(self.value)) if not newval: try: int(self.value, 16) except ValueError as exc: - raise ValueError(f"current entity value {self.value!r} " - f"is not a valid Unicode codepoint") from exc + raise ValueError("current entity value {!r} is not a valid " + "Unicode codepoint".format(self.value)) from exc self._named = newval @hexadecimal.setter diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 8fa3a96..2c6df08 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -65,6 +65,6 @@ def parse_anything(value, context=0, skip_style_tags=False): nodelist += parse_anything(item, context, skip_style_tags).nodes return Wikicode(nodelist) except TypeError as exc: - raise ValueError(f"Needs string, Node, Wikicode, file, int, None, or " - f"iterable of these, but got {type(value).__name__}: " - f"{value}") from exc + error = ("Needs string, Node, Wikicode, file, int, None, or " + "iterable of these, but got {0}: {1}") + raise ValueError(error.format(type(value).__name__, value)) from exc From b7b3b2e33e6aacf4f809c8cc7d6aa5dcf48b2947 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 21 Dec 2020 04:04:59 -0500 Subject: [PATCH 27/31] Update changelog; minor tweak to file headers --- .travis.yml | 2 +- CHANGELOG | 11 +++++++++-- README.rst | 3 +-- appveyor.yml | 8 ++++++++ docs/changelog.rst | 17 +++++++++++++---- mwparserfromhell/__init__.py | 1 - mwparserfromhell/definitions.py | 1 - mwparserfromhell/nodes/__init__.py | 1 - mwparserfromhell/nodes/_base.py | 1 - mwparserfromhell/nodes/argument.py | 1 - mwparserfromhell/nodes/comment.py | 1 - mwparserfromhell/nodes/external_link.py | 1 - mwparserfromhell/nodes/extras/__init__.py | 1 - mwparserfromhell/nodes/extras/attribute.py | 1 - mwparserfromhell/nodes/extras/parameter.py | 1 - mwparserfromhell/nodes/heading.py | 1 - mwparserfromhell/nodes/html_entity.py | 1 - mwparserfromhell/nodes/tag.py | 1 - mwparserfromhell/nodes/template.py | 1 - mwparserfromhell/nodes/text.py | 1 - mwparserfromhell/nodes/wikilink.py | 1 - mwparserfromhell/parser/__init__.py | 1 - mwparserfromhell/parser/builder.py | 1 - mwparserfromhell/parser/contexts.py | 1 - mwparserfromhell/parser/errors.py | 1 - mwparserfromhell/parser/tokenizer.py | 1 - mwparserfromhell/parser/tokens.py | 1 - mwparserfromhell/smart_list/__init__.py | 1 - mwparserfromhell/smart_list/list_proxy.py | 1 - mwparserfromhell/smart_list/smart_list.py | 1 - mwparserfromhell/string_mixin.py | 1 - mwparserfromhell/utils.py | 1 - mwparserfromhell/wikicode.py | 1 - scripts/memtest.py | 2 -- setup.py | 3 ++- tests/_test_tokenizer.py | 1 - tests/_test_tree_equality.py | 1 - tests/test_argument.py | 1 - tests/test_attribute.py | 1 - tests/test_builder.py | 1 - tests/test_comment.py | 1 - tests/test_ctokenizer.py | 1 - tests/test_docs.py | 1 - tests/test_external_link.py | 1 - tests/test_heading.py | 1 - tests/test_html_entity.py | 1 - tests/test_parameter.py | 1 - tests/test_parser.py | 1 - tests/test_pytokenizer.py | 1 - tests/test_roundtripping.py | 1 - tests/test_smart_list.py | 1 - tests/test_string_mixin.py | 1 - tests/test_tag.py | 1 - tests/test_template.py | 1 - tests/test_text.py | 1 - tests/test_tokens.py | 1 - tests/test_utils.py | 1 - tests/test_wikicode.py | 1 - tests/test_wikilink.py | 1 - 59 files changed, 34 insertions(+), 64 deletions(-) diff --git a/.travis.yml b/.travis.yml index f352d3d..7668ab8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ python: - 3.6 - 3.7 - 3.8 - - 3.9-dev + - 3.9 arch: - amd64 - ppc64le diff --git a/CHANGELOG b/CHANGELOG index 3f9ca3a..6d0cf86 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,10 +1,17 @@ v0.6 (unreleased): -- Added support for Python 3.8. -- Dropped support for end-of-life Python 3.4. +Thanks to everyone for their patience with this release! + +- Breaking change: dropped support for end-of-life Python 2.7 and 3.4. +- Added support for Python 3.8 and 3.9. +- Added binary wheels for Linux and macOS. - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (#216) +- Add a 'default' parameter to Template.get, and implement dict-style item + access for template parameters. (#252) - Fixed a rare parsing bug involving deeply nested style tags. (#224) +- Updated HTML tag definitions. +- Internal refactoring and cleanup. v0.5.4 (released May 15, 2019): diff --git a/README.rst b/README.rst index bbac7e6..45db387 100644 --- a/README.rst +++ b/README.rst @@ -41,8 +41,7 @@ Normal usage is rather straightforward (where ``text`` is page text): >>> wikicode = mwparserfromhell.parse(text) ``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an -ordinary ``str`` object with some extra methods. -For example: +ordinary ``str`` object with some extra methods. For example: >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" >>> wikicode = mwparserfromhell.parse(text) diff --git a/appveyor.yml b/appveyor.yml index ccbaf59..a39024f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -54,6 +54,14 @@ environment: PYTHON_VERSION: "3.8" PYTHON_ARCH: "64" + - PYTHON: "C:\\Python39" + PYTHON_VERSION: "3.9" + PYTHON_ARCH: "32" + + - PYTHON: "C:\\Python39-x64" + PYTHON_VERSION: "3.9" + PYTHON_ARCH: "64" + install: - "%PIP% install --disable-pip-version-check --user --upgrade pip" - "%PIP% install wheel twine" diff --git a/docs/changelog.rst b/docs/changelog.rst index cf3ec8d..59bedc8 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,12 +7,21 @@ v0.6 Unreleased (`changes `__): -- Added support for Python 3.8. -- Dropped support for end-of-life Python 3.4. -- Updated Wikicode.matches() to recognize underscores as being equivalent - to spaces. (`#216 `_) +Thanks to everyone for their patience with this release! + +- Breaking change: dropped support for end-of-life Python 2.7 and 3.4. +- Added support for Python 3.8 and 3.9. +- Added binary wheels for Linux and macOS. +- Updated :meth:`.Wikicode.matches` to recognize underscores as being + equivalent to spaces. + (`#216 `_) +- Add a `default` parameter to :meth:`.Template.get`, and implement dict-style + item access for template parameters. + (`#252 `_) - Fixed a rare parsing bug involving deeply nested style tags. (`#224 `_) +- Updated HTML tag definitions. +- Internal refactoring and cleanup. v0.5.4 ------ diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 609999b..841f992 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py index bd0e969..c8d37cd 100644 --- a/mwparserfromhell/definitions.py +++ b/mwparserfromhell/definitions.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 4c29a5b..18a1780 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/_base.py b/mwparserfromhell/nodes/_base.py index e4a3c2e..e6b2a50 100644 --- a/mwparserfromhell/nodes/_base.py +++ b/mwparserfromhell/nodes/_base.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index a852a65..501788f 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index 56b05b7..fd8a9cc 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index ba86659..0423e2a 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py index 43fe862..ef76125 100644 --- a/mwparserfromhell/nodes/extras/__init__.py +++ b/mwparserfromhell/nodes/extras/__init__.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2016 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 442c3ac..9e7b7cd 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index 9287e00..44fb3aa 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index de4dc70..77f2f68 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index b08d2ee..fa3fa4d 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 094853b..eb59c5b 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 6f23556..493e2b4 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index cce670c..3242170 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index fc78833..7304168 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index cde45c5..3fad93b 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 4c14b2a..2f58455 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index b6d013e..aecc2f3 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2019 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/parser/errors.py b/mwparserfromhell/parser/errors.py index adf3d5d..f94e62b 100644 --- a/mwparserfromhell/parser/errors.py +++ b/mwparserfromhell/parser/errors.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 93b79d9..d4e6c8c 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index 257ed89..f274123 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/smart_list/__init__.py b/mwparserfromhell/smart_list/__init__.py index 723d992..92c75e2 100644 --- a/mwparserfromhell/smart_list/__init__.py +++ b/mwparserfromhell/smart_list/__init__.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # diff --git a/mwparserfromhell/smart_list/list_proxy.py b/mwparserfromhell/smart_list/list_proxy.py index f1525fc..d2d89e9 100644 --- a/mwparserfromhell/smart_list/list_proxy.py +++ b/mwparserfromhell/smart_list/list_proxy.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # diff --git a/mwparserfromhell/smart_list/smart_list.py b/mwparserfromhell/smart_list/smart_list.py index f83e181..e2fd87f 100644 --- a/mwparserfromhell/smart_list/smart_list.py +++ b/mwparserfromhell/smart_list/smart_list.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index f39cce7..2aeabf5 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 2c6df08..5d262b9 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 381d938..bbd38a2 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/scripts/memtest.py b/scripts/memtest.py index 3da1fcc..6f0d1ab 100644 --- a/scripts/memtest.py +++ b/scripts/memtest.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/setup.py b/setup.py index 6fee34a..3c7d2cb 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ from setuptools.command.build_ext import build_ext from mwparserfromhell import __version__ -with open("README.rst", encoding='utf-8') as fp: +with open("README.rst") as fp: long_docs = fp.read() use_extension = True @@ -97,6 +97,7 @@ setup( "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Topic :: Text Processing :: Markup" ], ) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 2629671..6a749c7 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 407711e..3dc28c0 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_argument.py b/tests/test_argument.py index 110436a..ccf429d 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_attribute.py b/tests/test_attribute.py index a8be214..74739b7 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_builder.py b/tests/test_builder.py index e5f43aa..59f3445 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2019 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_comment.py b/tests/test_comment.py index 60cbba6..071d1d8 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index f9b8d2f..53d6700 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2016 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_docs.py b/tests/test_docs.py index 101a347..e478a5f 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 1323109..462a438 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_heading.py b/tests/test_heading.py index a031332..d043bc7 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index d3a9bd2..85a23ec 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 3d0028e..fe527f1 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_parser.py b/tests/test_parser.py index 22a76f6..b16449f 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2016 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 9fd0c3e..3015b14 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2019 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_roundtripping.py b/tests/test_roundtripping.py index 9ecd5bd..b5e204c 100644 --- a/tests/test_roundtripping.py +++ b/tests/test_roundtripping.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2016 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 58b327a..0cdb79a 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index aa13f11..e6d1ff4 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_tag.py b/tests/test_tag.py index 1fb82e9..2bea194 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_template.py b/tests/test_template.py index 34dd32d..cf812a6 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_text.py b/tests/test_text.py index a54311a..96dff16 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 1f6c02a..42f54bf 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_utils.py b/tests/test_utils.py index b8572fd..89ba93c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2016 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 0188ad0..6e017de 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 597c18f..1f331e0 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -1,4 +1,3 @@ -# # Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy From 90061b6844407a7671501d3060d9617a18d6e59b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Mon, 21 Dec 2020 10:13:26 +0100 Subject: [PATCH 28/31] Fix parsing of section headings inside templates (#233) Fixes #198 Co-authored-by: Ben Kurtovic --- mwparserfromhell/parser/ctokenizer/tok_parse.c | 8 ++++++-- mwparserfromhell/parser/tokenizer.py | 7 +++++-- tests/tokenizer/templates.mwtest | 28 ++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/mwparserfromhell/parser/ctokenizer/tok_parse.c b/mwparserfromhell/parser/ctokenizer/tok_parse.c index be7018b..e73b3ef 100644 --- a/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -2628,7 +2628,11 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) return NULL; } else if (this == '=' && this_context & LC_TEMPLATE_PARAM_KEY) { - if (Tokenizer_handle_template_param_value(self)) + if (!(self->global & GL_HEADING) && (!last || last == '\n') && next == '=') { + if (Tokenizer_parse_heading(self)) + return NULL; + } + else if (Tokenizer_handle_template_param_value(self)) return NULL; } else if (this == next && next == '}' && this_context & LC_TEMPLATE) @@ -2668,7 +2672,7 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) } else if (this == ']' && this_context & LC_EXT_LINK_TITLE) return Tokenizer_pop(self); - else if (this == '=' && !(self->global & GL_HEADING)) { + else if (this == '=' && !(self->global & GL_HEADING) && !(this_context & LC_TEMPLATE)) { if (!last || last == '\n') { if (Tokenizer_parse_heading(self)) return NULL; diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index d4e6c8c..ab61f92 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1326,7 +1326,10 @@ class Tokenizer: elif this == "|" and self._context & contexts.TEMPLATE: self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: - self._handle_template_param_value() + if not self._global & contexts.GL_HEADING and self._read(-1) in ("\n", self.START) and nxt == "=": + self._parse_heading() + else: + self._handle_template_param_value() elif this == nxt == "}" and self._context & contexts.TEMPLATE: return self._handle_template_end() elif this == "|" and self._context & contexts.ARGUMENT_NAME: @@ -1350,7 +1353,7 @@ class Tokenizer: self._parse_external_link(False) elif this == "]" and self._context & contexts.EXT_LINK_TITLE: return self._pop() - elif this == "=" and not self._global & contexts.GL_HEADING: + elif this == "=" and not self._global & contexts.GL_HEADING and not self._context & contexts.TEMPLATE: if self._read(-1) in ("\n", self.START): self._parse_heading() else: diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest index 8d30069..72ba9c7 100644 --- a/tests/tokenizer/templates.mwtest +++ b/tests/tokenizer/templates.mwtest @@ -695,3 +695,31 @@ name: recursion_opens_and_closes label: test potentially dangerous recursion: template openings and closings input: "{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}{{x|{{x}}" output: [Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose(), Text(text="{{x|"), TemplateOpen(), Text(text="x"), TemplateClose()] + +--- + +name: invalid_section_level_1 +label: level 1 headings inside a template are always invalid +input: "{{foo|bar\n=baz=\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz=\n"), TemplateClose()] + +--- + +name: section_level_2 +label: valid level 2 heading inside a template +input: "{{foo|bar\n==baz==\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), HeadingStart(level=2), Text(text="baz"), HeadingEnd(), Text(text="\n"), TemplateClose()] + +--- + +name: invalid_section_level_2 +label: invalid level 2 heading inside a template +input: "{{foo|bar==baz==\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="=baz==\n"), TemplateClose()] + +--- + +name: section_level_2_after_template_parameter +label: level 2 heading inside a template after a parameter +input: "{{foo|bar=\n==baz==\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="\n==baz==\n"), TemplateClose()] From 895730681a65cdb11ab6cc5971d61ee37d48d2cc Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 21 Dec 2020 04:23:36 -0500 Subject: [PATCH 29/31] Update changelog and update AppVeyor config --- CHANGELOG | 3 ++- README.rst | 2 +- appveyor.yml | 2 ++ docs/changelog.rst | 4 +++- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 6d0cf86..e599665 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,9 +7,10 @@ Thanks to everyone for their patience with this release! - Added binary wheels for Linux and macOS. - Updated Wikicode.matches() to recognize underscores as being equivalent to spaces. (#216) -- Add a 'default' parameter to Template.get, and implement dict-style item +- Added a 'default' parameter to Template.get, and implement dict-style item access for template parameters. (#252) - Fixed a rare parsing bug involving deeply nested style tags. (#224) +- Fixed parsing of section headings inside templates. (#233) - Updated HTML tag definitions. - Internal refactoring and cleanup. diff --git a/README.rst b/README.rst index 45db387..8641103 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ mwparserfromhell ================ -.. image:: https://img.shields.io/travis/earwig/mwparserfromhell/develop.svg +.. image:: https://api.travis-ci.com/earwig/mwparserfromhell.svg?branch=develop :alt: Build Status :target: https://travis-ci.org/earwig/mwparserfromhell diff --git a/appveyor.yml b/appveyor.yml index a39024f..4307169 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -9,6 +9,8 @@ branches: skip_tags: true +image: Visual Studio 2019 + environment: global: # See: http://stackoverflow.com/a/13751649/163740 diff --git a/docs/changelog.rst b/docs/changelog.rst index 59bedc8..afade5f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -15,11 +15,13 @@ Thanks to everyone for their patience with this release! - Updated :meth:`.Wikicode.matches` to recognize underscores as being equivalent to spaces. (`#216 `_) -- Add a `default` parameter to :meth:`.Template.get`, and implement dict-style +- Added a `default` parameter to :meth:`.Template.get`, and implement dict-style item access for template parameters. (`#252 `_) - Fixed a rare parsing bug involving deeply nested style tags. (`#224 `_) +- Fixed parsing of section headings inside templates. + (`#233 `_) - Updated HTML tag definitions. - Internal refactoring and cleanup. From 2721de2447c5a6dd92012fda5ffb0c63cc3629e8 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 21 Dec 2020 04:37:20 -0500 Subject: [PATCH 30/31] Drop Python 3.5 Windows wheels It's EOL and AppVeyor builds are broken under it if we want to support Python 3.9. Not fully deprecating 3.5 for now because it's still used on Toolforge. --- appveyor.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 4307169..23286e0 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -24,14 +24,6 @@ environment: secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+ matrix: - - PYTHON: "C:\\Python35" - PYTHON_VERSION: "3.5" - PYTHON_ARCH: "32" - - - PYTHON: "C:\\Python35-x64" - PYTHON_VERSION: "3.5" - PYTHON_ARCH: "64" - - PYTHON: "C:\\Python36" PYTHON_VERSION: "3.6" PYTHON_ARCH: "32" From 0f05dfa54643aa81474c9572590c08d6facdd434 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 21 Dec 2020 04:43:25 -0500 Subject: [PATCH 31/31] release/0.6 --- CHANGELOG | 2 +- appveyor.yml | 2 +- docs/changelog.rst | 4 ++-- mwparserfromhell/__init__.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index e599665..582ab88 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -v0.6 (unreleased): +v0.6 (released December 21, 2020): Thanks to everyone for their patience with this release! diff --git a/appveyor.yml b/appveyor.yml index 23286e0..f7987ab 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ # This config file is used by appveyor.com to build Windows release binaries -version: 0.6.dev0-b{build} +version: 0.6-b{build} branches: only: diff --git a/docs/changelog.rst b/docs/changelog.rst index afade5f..fa6264d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,8 +4,8 @@ Changelog v0.6 ---- -Unreleased -(`changes `__): +`Released December 21, 2020 `_ +(`changes `__): Thanks to everyone for their patience with this release! diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 841f992..409335b 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -27,7 +27,7 @@ outrageously powerful parser for `MediaWiki `_ wikico __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012-2020 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.6.dev0" +__version__ = "0.6" __email__ = "ben.kurtovic@gmail.com" from . import (definitions, nodes, parser, smart_list, string_mixin,