@@ -6,4 +6,3 @@ partial_branches = | |||
pragma: no branch | |||
if py3k: | |||
if not py3k: | |||
if py26: |
@@ -1,16 +1,12 @@ | |||
language: python | |||
python: | |||
- 2.6 | |||
- 2.7 | |||
- 3.2 | |||
- 3.3 | |||
- 3.4 | |||
- 3.5 | |||
- 3.6 | |||
- nightly | |||
sudo: false | |||
install: | |||
- if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]]; then pip install coverage==3.7.1; fi | |||
- pip install coveralls | |||
- python setup.py build | |||
script: | |||
@@ -1,4 +1,13 @@ | |||
v0.5.1 (released March 03, 2018): | |||
v0.5.2 (released November 1, 2018): | |||
- Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. (#199, #204) | |||
- Fixed signals getting stuck inside the C tokenizer until parsing finishes, | |||
in pathological cases. (#206) | |||
- Fixed <wbr> not being considered a single-only tag. (#200) | |||
- Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. (#208) | |||
- Cleaned up some minor documentation issues. (#207) | |||
v0.5.1 (released March 3, 2018): | |||
- Improved behavior when adding parameters to templates (via Template.add()) | |||
with poorly formatted whitespace conventions. (#185) | |||
@@ -35,81 +35,81 @@ You can run the comprehensive unit testing suite with | |||
Usage | |||
----- | |||
Normal usage is rather straightforward (where ``text`` is page text):: | |||
Normal usage is rather straightforward (where ``text`` is page text): | |||
>>> import mwparserfromhell | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
>>> import mwparserfromhell | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an | |||
ordinary ``str`` object (or ``unicode`` in Python 2) with some extra methods. | |||
For example:: | |||
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
>>> print(wikicode) | |||
I has a template! {{foo|bar|baz|eggs=spam}} See it? | |||
>>> templates = wikicode.filter_templates() | |||
>>> print(templates) | |||
['{{foo|bar|baz|eggs=spam}}'] | |||
>>> template = templates[0] | |||
>>> print(template.name) | |||
foo | |||
>>> print(template.params) | |||
['bar', 'baz', 'eggs=spam'] | |||
>>> print(template.get(1).value) | |||
bar | |||
>>> print(template.get("eggs").value) | |||
spam | |||
Since nodes can contain other nodes, getting nested templates is trivial:: | |||
>>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" | |||
>>> mwparserfromhell.parse(text).filter_templates() | |||
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | |||
For example: | |||
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
>>> print(wikicode) | |||
I has a template! {{foo|bar|baz|eggs=spam}} See it? | |||
>>> templates = wikicode.filter_templates() | |||
>>> print(templates) | |||
['{{foo|bar|baz|eggs=spam}}'] | |||
>>> template = templates[0] | |||
>>> print(template.name) | |||
foo | |||
>>> print(template.params) | |||
['bar', 'baz', 'eggs=spam'] | |||
>>> print(template.get(1).value) | |||
bar | |||
>>> print(template.get("eggs").value) | |||
spam | |||
Since nodes can contain other nodes, getting nested templates is trivial: | |||
>>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" | |||
>>> mwparserfromhell.parse(text).filter_templates() | |||
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | |||
You can also pass ``recursive=False`` to ``filter_templates()`` and explore | |||
templates manually. This is possible because nodes can contain additional | |||
``Wikicode`` objects:: | |||
>>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | |||
>>> print(code.filter_templates(recursive=False)) | |||
['{{foo|this {{includes a|template}}}}'] | |||
>>> foo = code.filter_templates(recursive=False)[0] | |||
>>> print(foo.get(1).value) | |||
this {{includes a|template}} | |||
>>> print(foo.get(1).value.filter_templates()[0]) | |||
{{includes a|template}} | |||
>>> print(foo.get(1).value.filter_templates()[0].get(1).value) | |||
template | |||
``Wikicode`` objects: | |||
>>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | |||
>>> print(code.filter_templates(recursive=False)) | |||
['{{foo|this {{includes a|template}}}}'] | |||
>>> foo = code.filter_templates(recursive=False)[0] | |||
>>> print(foo.get(1).value) | |||
this {{includes a|template}} | |||
>>> print(foo.get(1).value.filter_templates()[0]) | |||
{{includes a|template}} | |||
>>> print(foo.get(1).value.filter_templates()[0].get(1).value) | |||
template | |||
Templates can be easily modified to add, remove, or alter params. ``Wikicode`` | |||
objects can be treated like lists, with ``append()``, ``insert()``, | |||
``remove()``, ``replace()``, and more. They also have a ``matches()`` method | |||
for comparing page or template names, which takes care of capitalization and | |||
whitespace:: | |||
>>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" | |||
>>> code = mwparserfromhell.parse(text) | |||
>>> for template in code.filter_templates(): | |||
... if template.name.matches("Cleanup") and not template.has("date"): | |||
... template.add("date", "July 2012") | |||
... | |||
>>> print(code) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} | |||
>>> code.replace("{{uncategorized}}", "{{bar-stub}}") | |||
>>> print(code) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | |||
>>> print(code.filter_templates()) | |||
['{{cleanup|date=July 2012}}', '{{bar-stub}}'] | |||
whitespace: | |||
>>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" | |||
>>> code = mwparserfromhell.parse(text) | |||
>>> for template in code.filter_templates(): | |||
... if template.name.matches("Cleanup") and not template.has("date"): | |||
... template.add("date", "July 2012") | |||
... | |||
>>> print(code) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} | |||
>>> code.replace("{{uncategorized}}", "{{bar-stub}}") | |||
>>> print(code) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | |||
>>> print(code.filter_templates()) | |||
['{{cleanup|date=July 2012}}', '{{bar-stub}}'] | |||
You can then convert ``code`` back into a regular ``str`` object (for | |||
saving the page!) by calling ``str()`` on it:: | |||
saving the page!) by calling ``str()`` on it: | |||
>>> text = str(code) | |||
>>> print(text) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | |||
>>> text == code | |||
True | |||
>>> text = str(code) | |||
>>> print(text) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | |||
>>> text == code | |||
True | |||
Likewise, use ``unicode(code)`` in Python 2. | |||
@@ -164,7 +164,9 @@ Integration | |||
``Page`` objects have a ``parse`` method that essentially calls | |||
``mwparserfromhell.parse()`` on ``page.get()``. | |||
If you're using Pywikibot_, your code might look like this:: | |||
If you're using Pywikibot_, your code might look like this: | |||
.. code-block:: python | |||
import mwparserfromhell | |||
import pywikibot | |||
@@ -176,7 +178,9 @@ If you're using Pywikibot_, your code might look like this:: | |||
return mwparserfromhell.parse(text) | |||
If you're not using a library, you can parse any page using the following | |||
Python 3 code (via the API_):: | |||
Python 3 code (via the API_): | |||
.. code-block:: python | |||
import json | |||
from urllib.parse import urlencode | |||
@@ -189,11 +193,11 @@ Python 3 code (via the API_):: | |||
"rvprop": "content", "format": "json", "titles": title} | |||
raw = urlopen(API_URL, urlencode(data).encode()).read() | |||
res = json.loads(raw) | |||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | |||
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] | |||
return mwparserfromhell.parse(text) | |||
.. _MediaWiki: http://mediawiki.org | |||
.. _ReadTheDocs: http://mwparserfromhell.readthedocs.org | |||
.. _ReadTheDocs: http://mwparserfromhell.readthedocs.io | |||
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | |||
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | |||
.. _Legoktm: http://en.wikipedia.org/wiki/User:Legoktm | |||
@@ -1,6 +1,6 @@ | |||
# This config file is used by appveyor.com to build Windows release binaries | |||
version: 0.5.1-b{build} | |||
version: 0.5.2-b{build} | |||
branches: | |||
only: | |||
@@ -13,8 +13,9 @@ environment: | |||
global: | |||
# See: http://stackoverflow.com/a/13751649/163740 | |||
WRAPPER: "cmd /E:ON /V:ON /C .\\scripts\\win_wrapper.cmd" | |||
PIP: "%WRAPPER% %PYTHON%\\Scripts\\pip.exe" | |||
SETUPPY: "%WRAPPER% %PYTHON%\\python setup.py --with-extension" | |||
PIP: "%WRAPPER% %PYTHON%\\python.exe -m pip" | |||
SETUPPY: "%WRAPPER% %PYTHON%\\python.exe setup.py --with-extension" | |||
TWINE: "%WRAPPER% %PYTHON%\\python.exe -m twine" | |||
PYPI_USERNAME: "earwigbot" | |||
PYPI_PASSWORD: | |||
secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+ | |||
@@ -28,14 +29,6 @@ environment: | |||
PYTHON_VERSION: "2.7" | |||
PYTHON_ARCH: "64" | |||
- PYTHON: "C:\\Python33" | |||
PYTHON_VERSION: "3.3" | |||
PYTHON_ARCH: "32" | |||
- PYTHON: "C:\\Python33-x64" | |||
PYTHON_VERSION: "3.3" | |||
PYTHON_ARCH: "64" | |||
- PYTHON: "C:\\Python34" | |||
PYTHON_VERSION: "3.4" | |||
PYTHON_ARCH: "32" | |||
@@ -60,6 +53,14 @@ environment: | |||
PYTHON_VERSION: "3.6" | |||
PYTHON_ARCH: "64" | |||
- PYTHON: "C:\\Python37" | |||
PYTHON_VERSION: "3.7" | |||
PYTHON_ARCH: "32" | |||
- PYTHON: "C:\\Python37-x64" | |||
PYTHON_VERSION: "3.7" | |||
PYTHON_ARCH: "64" | |||
install: | |||
- "%PIP% install --disable-pip-version-check --user --upgrade pip" | |||
- "%PIP% install wheel twine" | |||
@@ -74,7 +75,7 @@ after_test: | |||
- "%SETUPPY% bdist_wheel" | |||
on_success: | |||
- "IF %APPVEYOR_REPO_BRANCH%==master %WRAPPER% %PYTHON%\\python -m twine upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%" | |||
- "IF %APPVEYOR_REPO_BRANCH%==master %TWINE% upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%" | |||
artifacts: | |||
- path: dist\* | |||
@@ -1,10 +1,29 @@ | |||
Changelog | |||
========= | |||
v0.5.2 | |||
------ | |||
`Released November 1, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.2>`_ | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5.1...v0.5.2>`__): | |||
- Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. | |||
(`#199 <https://github.com/earwig/mwparserfromhell/issues/199>`, | |||
`#204 <https://github.com/earwig/mwparserfromhell/pull/204>`) | |||
- Fixed signals getting stuck inside the C tokenizer until parsing finishes, | |||
in pathological cases. | |||
(`#206 <https://github.com/earwig/mwparserfromhell/issues/206>`) | |||
- Fixed `<wbr>` not being considered a single-only tag. | |||
(`#200 <https://github.com/earwig/mwparserfromhell/pull/200>`) | |||
- Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. | |||
(`#208 <https://github.com/earwig/mwparserfromhell/issues/208>`) | |||
- Cleaned up some minor documentation issues. | |||
(`#207 <https://github.com/earwig/mwparserfromhell/pull/207>`) | |||
v0.5.1 | |||
------ | |||
`Released March 03, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.1>`_ | |||
`Released March 3, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.1>`_ | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5...v0.5.1>`__): | |||
- Improved behavior when adding parameters to templates (via | |||
@@ -32,7 +32,7 @@ If you're not using a library, you can parse any page using the following code | |||
"rvprop": "content", "format": "json", "titles": title} | |||
raw = urlopen(API_URL, urlencode(data).encode()).read() | |||
res = json.loads(raw) | |||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | |||
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] | |||
return mwparserfromhell.parse(text) | |||
.. _EarwigBot: https://github.com/earwig/earwigbot | |||
@@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | |||
__author__ = "Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2012-2018 Ben Kurtovic" | |||
__license__ = "MIT License" | |||
__version__ = "0.5.1" | |||
__version__ = "0.5.2" | |||
__email__ = "ben.kurtovic@gmail.com" | |||
from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | |||
@@ -10,9 +10,7 @@ meant to be imported directly from within the parser's modules. | |||
import sys | |||
py26 = (sys.version_info[0] == 2) and (sys.version_info[1] == 6) | |||
py3k = (sys.version_info[0] == 3) | |||
py32 = py3k and (sys.version_info[1] == 2) | |||
if py3k: | |||
bytes = bytes | |||
@@ -56,8 +56,8 @@ INVISIBLE_TAGS = [ | |||
"section", "templatedata", "timeline" | |||
] | |||
# [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762 | |||
SINGLE_ONLY = ["br", "hr", "meta", "link", "img"] | |||
# [mediawiki/core.git]/includes/Sanitizer.php @ 065bec63ea | |||
SINGLE_ONLY = ["br", "hr", "meta", "link", "img", "wbr"] | |||
SINGLE = SINGLE_ONLY + ["li", "dt", "dd", "th", "td", "tr"] | |||
MARKUP_TO_HTML = { | |||
@@ -40,7 +40,7 @@ class Attribute(StringMixIn): | |||
pad_before_eq="", pad_after_eq="", check_quotes=True): | |||
super(Attribute, self).__init__() | |||
if check_quotes and not quotes and self._value_needs_quotes(value): | |||
raise ValueError("given value {0!r} requires quotes".format(value)) | |||
raise ValueError("given value {!r} requires quotes".format(value)) | |||
self._name = name | |||
self._value = value | |||
self._quotes = quotes | |||
@@ -79,7 +79,7 @@ class Attribute(StringMixIn): | |||
"""Coerce a quote type into an acceptable value, or raise an error.""" | |||
orig, quotes = quotes, str(quotes) if quotes else None | |||
if quotes not in [None, '"', "'"]: | |||
raise ValueError("{0!r} is not a valid quote type".format(orig)) | |||
raise ValueError("{!r} is not a valid quote type".format(orig)) | |||
return quotes | |||
@property | |||
@@ -41,7 +41,7 @@ class Parameter(StringMixIn): | |||
def __init__(self, name, value, showkey=True): | |||
super(Parameter, self).__init__() | |||
if not showkey and not self.can_hide_key(name): | |||
raise ValueError("key {0!r} cannot be hidden".format(name)) | |||
raise ValueError("key {!r} cannot be hidden".format(name)) | |||
self._name = name | |||
self._value = value | |||
self._showkey = showkey | |||
@@ -53,10 +53,10 @@ class HTMLEntity(Node): | |||
def __unicode__(self): | |||
if self.named: | |||
return "&{0};".format(self.value) | |||
return "&{};".format(self.value) | |||
if self.hexadecimal: | |||
return "&#{0}{1};".format(self.hex_char, self.value) | |||
return "&#{0};".format(self.value) | |||
return "&#{}{};".format(self.hex_char, self.value) | |||
return "&#{};".format(self.value) | |||
def __strip__(self, **kwargs): | |||
if kwargs.get("normalize"): | |||
@@ -35,7 +35,7 @@ class ParserError(Exception): | |||
can happen. Its appearance indicates a bug. | |||
""" | |||
def __init__(self, extra): | |||
msg = "This is a bug and should be reported. Info: {0}.".format(extra) | |||
msg = "This is a bug and should be reported. Info: {}.".format(extra) | |||
super(ParserError, self).__init__(msg) | |||
@@ -45,11 +45,12 @@ static const char* PARSER_BLACKLIST[] = { | |||
}; | |||
static const char* SINGLE[] = { | |||
"br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", NULL | |||
"br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", | |||
"wbr", NULL | |||
}; | |||
static const char* SINGLE_ONLY[] = { | |||
"br", "hr", "meta", "link", "img", NULL | |||
"br", "hr", "meta", "link", "img", "wbr", NULL | |||
}; | |||
/* | |||
@@ -2603,6 +2603,8 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) | |||
} | |||
if (!this) | |||
return Tokenizer_handle_end(self, this_context); | |||
if (PyErr_CheckSignals()) | |||
return NULL; | |||
next = Tokenizer_read(self, 1); | |||
last = Tokenizer_read_backwards(self, 1); | |||
if (this == next && next == '{') { | |||
@@ -207,7 +207,7 @@ static int load_entities(void) | |||
if (!deflist) | |||
return -1; | |||
Py_DECREF(defmap); | |||
numdefs = (unsigned) PyList_GET_SIZE(defmap); | |||
numdefs = (unsigned) PyList_GET_SIZE(deflist); | |||
entitydefs = calloc(numdefs + 1, sizeof(char*)); | |||
if (!entitydefs) | |||
return -1; | |||
@@ -44,7 +44,7 @@ class Token(dict): | |||
args.append(key + "=" + repr(value[:97] + "...")) | |||
else: | |||
args.append(key + "=" + repr(value)) | |||
return "{0}({1})".format(type(self).__name__, ", ".join(args)) | |||
return "{}({})".format(type(self).__name__, ", ".join(args)) | |||
def __eq__(self, other): | |||
return isinstance(other, type(self)) and dict.__eq__(self, other) | |||
@@ -28,7 +28,7 @@ interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. | |||
from __future__ import unicode_literals | |||
from sys import getdefaultencoding | |||
from .compat import bytes, py26, py3k, str | |||
from .compat import bytes, py3k, str | |||
__all__ = ["StringMixIn"] | |||
@@ -109,21 +109,12 @@ class StringMixIn(object): | |||
def __getattr__(self, attr): | |||
if not hasattr(str, attr): | |||
raise AttributeError("{0!r} object has no attribute {1!r}".format( | |||
raise AttributeError("{!r} object has no attribute {!r}".format( | |||
type(self).__name__, attr)) | |||
return getattr(self.__unicode__(), attr) | |||
if py3k: | |||
maketrans = str.maketrans # Static method can't rely on __getattr__ | |||
if py26: | |||
@inheritdoc | |||
def encode(self, encoding=None, errors=None): | |||
if encoding is None: | |||
encoding = getdefaultencoding() | |||
if errors is not None: | |||
return self.__unicode__().encode(encoding, errors) | |||
return self.__unicode__().encode(encoding) | |||
del inheritdoc |
@@ -80,7 +80,7 @@ class MemoryTest(object): | |||
raw = raw.encode("raw_unicode_escape") | |||
data["input"] = raw.decode("unicode_escape") | |||
number = str(counter).zfill(digits) | |||
fname = "test_{0}{1}_{2}".format(name, number, data["name"]) | |||
fname = "test_{}{}_{}".format(name, number, data["name"]) | |||
self._tests.append((fname, data["input"])) | |||
counter += 1 | |||
@@ -117,7 +117,7 @@ class MemoryTest(object): | |||
tmpl = "{0}LEAKING{1}: {2:n} bytes, {3:.2%} inc ({4:n} bytes/loop)" | |||
sys.stdout.write(tmpl.format(Color.YELLOW, Color.RESET, d, p, bpt)) | |||
else: | |||
sys.stdout.write("{0}OK{1}".format(Color.GREEN, Color.RESET)) | |||
sys.stdout.write("{}OK{}".format(Color.GREEN, Color.RESET)) | |||
def run(self): | |||
"""Run the memory test suite.""" | |||
@@ -9,7 +9,7 @@ fi | |||
VERSION=$1 | |||
SCRIPT_DIR=$(dirname "$0") | |||
RELEASE_DATE=$(date +"%B %d, %Y") | |||
RELEASE_DATE=$(date +"%B %-d, %Y") | |||
check_git() { | |||
if [[ -n "$(git status --porcelain --untracked-files=no)" ]]; then | |||
@@ -76,9 +76,8 @@ do_git_stuff() { | |||
} | |||
upload_to_pypi() { | |||
echo -n "PyPI: uploading source tarball and docs..." | |||
echo -n "PyPI: uploading source tarball..." | |||
python setup.py -q register sdist upload -s | |||
python setup.py -q upload_docs | |||
echo " done." | |||
} | |||
@@ -88,7 +87,7 @@ post_release() { | |||
echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION" | |||
echo "*** Verify: https://pypi.python.org/pypi/mwparserfromhell" | |||
echo "*** Verify: https://ci.appveyor.com/project/earwig/mwparserfromhell" | |||
echo "*** Verify: https://mwparserfromhell.readthedocs.org" | |||
echo "*** Verify: https://mwparserfromhell.readthedocs.io" | |||
echo "*** Press enter to sanity-check the release." | |||
read | |||
} | |||
@@ -27,15 +27,15 @@ from glob import glob | |||
from os import environ | |||
import sys | |||
if ((sys.version_info[0] == 2 and sys.version_info[1] < 6) or | |||
(sys.version_info[1] == 3 and sys.version_info[1] < 2)): | |||
raise RuntimeError("mwparserfromhell needs Python 2.6+ or 3.2+") | |||
if ((sys.version_info[0] == 2 and sys.version_info[1] < 7) or | |||
(sys.version_info[1] == 3 and sys.version_info[1] < 4)): | |||
raise RuntimeError("mwparserfromhell needs Python 2.7 or 3.4+") | |||
from setuptools import setup, find_packages, Extension | |||
from setuptools.command.build_ext import build_ext | |||
from mwparserfromhell import __version__ | |||
from mwparserfromhell.compat import py26, py3k | |||
from mwparserfromhell.compat import py3k | |||
with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp: | |||
long_docs = fp.read() | |||
@@ -76,21 +76,20 @@ if fallback: | |||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||
sources=sorted(glob("mwparserfromhell/parser/ctokenizer/*.c")), | |||
depends=glob("mwparserfromhell/parser/ctokenizer/*.h")) | |||
depends=sorted(glob("mwparserfromhell/parser/ctokenizer/*.h"))) | |||
setup( | |||
name = "mwparserfromhell", | |||
packages = find_packages(exclude=("tests",)), | |||
ext_modules = [tokenizer] if use_extension else [], | |||
tests_require = ["unittest2"] if py26 else [], | |||
test_suite = "tests.discover", | |||
test_suite = "tests", | |||
version = __version__, | |||
author = "Ben Kurtovic", | |||
author_email = "ben.kurtovic@gmail.com", | |||
url = "https://github.com/earwig/mwparserfromhell", | |||
description = "MWParserFromHell is a parser for MediaWiki wikicode.", | |||
long_description = long_docs, | |||
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{0}".format(__version__), | |||
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{}".format(__version__), | |||
keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", | |||
license = "MIT License", | |||
classifiers = [ | |||
@@ -99,11 +98,9 @@ setup( | |||
"Intended Audience :: Developers", | |||
"License :: OSI Approved :: MIT License", | |||
"Operating System :: OS Independent", | |||
"Programming Language :: Python :: 2.6", | |||
"Programming Language :: Python :: 2", | |||
"Programming Language :: Python :: 2.7", | |||
"Programming Language :: Python :: 3", | |||
"Programming Language :: Python :: 3.2", | |||
"Programming Language :: Python :: 3.3", | |||
"Programming Language :: Python :: 3.4", | |||
"Programming Language :: Python :: 3.5", | |||
"Programming Language :: Python :: 3.6", | |||
@@ -109,7 +109,7 @@ class TokenizerTestCase(object): | |||
print(error.format(filename)) | |||
continue | |||
if data["input"] is None or data["output"] is None: | |||
error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" | |||
error = "Test '{}' in '{}' was ignored because it lacked an input or an output" | |||
print(error.format(data["name"], filename)) | |||
continue | |||
@@ -118,7 +118,7 @@ class TokenizerTestCase(object): | |||
if restrict and data["name"] != restrict: | |||
continue | |||
fname = "test_{0}{1}_{2}".format(name, number, data["name"]) | |||
fname = "test_{}{}_{}".format(name, number, data["name"]) | |||
meth = cls._build_test_method(fname, data) | |||
setattr(cls, fname, meth) | |||
@@ -126,7 +126,7 @@ class TokenizerTestCase(object): | |||
def build(cls): | |||
"""Load and install all tests from the 'tokenizer' directory.""" | |||
def load_file(filename, restrict=None): | |||
with codecs.open(filename, "rU", encoding="utf8") as fp: | |||
with codecs.open(filename, "r", encoding="utf8") as fp: | |||
text = fp.read() | |||
name = path.split(filename)[1][:-len(extension)] | |||
cls._load_tests(filename, name, text, restrict) | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
from unittest2 import TestCase | |||
except ImportError: | |||
from unittest import TestCase | |||
from unittest import TestCase | |||
from mwparserfromhell.compat import range | |||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||
@@ -1,24 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Discover tests using ``unittest2` for Python 2.6. | |||
It appears the default distutils test suite doesn't play nice with | |||
``setUpClass`` thereby making some tests fail. Using ``unittest2`` to load | |||
tests seems to work around that issue. | |||
http://stackoverflow.com/a/17004409/753501 | |||
""" | |||
import os.path | |||
from mwparserfromhell.compat import py26 | |||
if py26: | |||
import unittest2 as unittest | |||
else: | |||
import unittest | |||
def additional_tests(): | |||
project_root = os.path.split(os.path.dirname(__file__))[0] | |||
return unittest.defaultTestLoader.discover(project_root) |
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Argument, Text | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Template | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Comment | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
try: | |||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||
@@ -23,11 +23,7 @@ | |||
from __future__ import print_function, unicode_literals | |||
import json | |||
import os | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
import mwparserfromhell | |||
from mwparserfromhell.compat import py3k, str | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import ExternalLink, Text | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Heading, Text | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import HTMLEntity | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Text | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell import parser | |||
from mwparserfromhell.compat import range | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.parser.tokenizer import Tokenizer | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from ._test_tokenizer import TokenizerTestCase | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import py3k, range | |||
from mwparserfromhell.smart_list import SmartList, _ListProxy | |||
@@ -139,36 +135,36 @@ class TestSmartList(unittest.TestCase): | |||
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) | |||
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) | |||
self.assertTrue(list1 < list3) | |||
self.assertTrue(list1 <= list3) | |||
self.assertFalse(list1 == list3) | |||
self.assertTrue(list1 != list3) | |||
self.assertFalse(list1 > list3) | |||
self.assertFalse(list1 >= list3) | |||
self.assertLess(list1, list3) | |||
self.assertLessEqual(list1, list3) | |||
self.assertNotEqual(list1, list3) | |||
self.assertNotEqual(list1, list3) | |||
self.assertLessEqual(list1, list3) | |||
self.assertLess(list1, list3) | |||
other1 = [0, 2, 3, 4] | |||
self.assertTrue(list1 < other1) | |||
self.assertTrue(list1 <= other1) | |||
self.assertFalse(list1 == other1) | |||
self.assertTrue(list1 != other1) | |||
self.assertFalse(list1 > other1) | |||
self.assertFalse(list1 >= other1) | |||
self.assertLess(list1, other1) | |||
self.assertLessEqual(list1, other1) | |||
self.assertNotEqual(list1, other1) | |||
self.assertNotEqual(list1, other1) | |||
self.assertLessEqual(list1, other1) | |||
self.assertLess(list1, other1) | |||
other2 = [0, 0, 1, 2] | |||
self.assertFalse(list1 < other2) | |||
self.assertFalse(list1 <= other2) | |||
self.assertFalse(list1 == other2) | |||
self.assertTrue(list1 != other2) | |||
self.assertTrue(list1 > other2) | |||
self.assertTrue(list1 >= other2) | |||
self.assertGreaterEqual(list1, other2) | |||
self.assertGreater(list1, other2) | |||
self.assertNotEqual(list1, other2) | |||
self.assertNotEqual(list1, other2) | |||
self.assertGreater(list1, other2) | |||
self.assertGreaterEqual(list1, other2) | |||
other3 = [0, 1, 2, 3, "one", "two"] | |||
self.assertFalse(list1 < other3) | |||
self.assertTrue(list1 <= other3) | |||
self.assertTrue(list1 == other3) | |||
self.assertFalse(list1 != other3) | |||
self.assertFalse(list1 > other3) | |||
self.assertTrue(list1 >= other3) | |||
self.assertGreaterEqual(list1, other3) | |||
self.assertLessEqual(list1, other3) | |||
self.assertEqual(list1, other3) | |||
self.assertEqual(list1, other3) | |||
self.assertLessEqual(list1, other3) | |||
self.assertGreaterEqual(list1, other3) | |||
self.assertTrue(bool(list1)) | |||
self.assertFalse(bool(list2)) | |||
@@ -198,10 +194,10 @@ class TestSmartList(unittest.TestCase): | |||
self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) | |||
self.assertEqual([], list(reversed(list2))) | |||
self.assertTrue("one" in list1) | |||
self.assertTrue(3 in list1) | |||
self.assertFalse(10 in list1) | |||
self.assertFalse(0 in list2) | |||
self.assertIn("one", list1) | |||
self.assertIn(3, list1) | |||
self.assertNotIn(10, list1) | |||
self.assertNotIn(0, list2) | |||
self.assertEqual([], list2 * 5) | |||
self.assertEqual([], 5 * list2) | |||
@@ -23,13 +23,9 @@ | |||
from __future__ import unicode_literals | |||
from sys import getdefaultencoding | |||
from types import GeneratorType | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import bytes, py3k, py32, range, str | |||
from mwparserfromhell.compat import bytes, py3k, range, str | |||
from mwparserfromhell.string_mixin import StringMixIn | |||
class _FakeString(StringMixIn): | |||
@@ -54,9 +50,7 @@ class TestStringMixIn(unittest.TestCase): | |||
"rsplit", "rstrip", "split", "splitlines", "startswith", "strip", | |||
"swapcase", "title", "translate", "upper", "zfill"] | |||
if py3k: | |||
if not py32: | |||
methods.append("casefold") | |||
methods.extend(["format_map", "isidentifier", "isprintable", | |||
methods.extend(["casefold", "format_map", "isidentifier", "isprintable", | |||
"maketrans"]) | |||
else: | |||
methods.append("decode") | |||
@@ -90,33 +84,33 @@ class TestStringMixIn(unittest.TestCase): | |||
str4 = "this is a fake string" | |||
str5 = "fake string, this is" | |||
self.assertFalse(str1 > str2) | |||
self.assertTrue(str1 >= str2) | |||
self.assertTrue(str1 == str2) | |||
self.assertFalse(str1 != str2) | |||
self.assertFalse(str1 < str2) | |||
self.assertTrue(str1 <= str2) | |||
self.assertTrue(str1 > str3) | |||
self.assertTrue(str1 >= str3) | |||
self.assertFalse(str1 == str3) | |||
self.assertTrue(str1 != str3) | |||
self.assertFalse(str1 < str3) | |||
self.assertFalse(str1 <= str3) | |||
self.assertFalse(str1 > str4) | |||
self.assertTrue(str1 >= str4) | |||
self.assertTrue(str1 == str4) | |||
self.assertFalse(str1 != str4) | |||
self.assertFalse(str1 < str4) | |||
self.assertTrue(str1 <= str4) | |||
self.assertFalse(str5 > str1) | |||
self.assertFalse(str5 >= str1) | |||
self.assertFalse(str5 == str1) | |||
self.assertTrue(str5 != str1) | |||
self.assertTrue(str5 < str1) | |||
self.assertTrue(str5 <= str1) | |||
self.assertLessEqual(str1, str2) | |||
self.assertGreaterEqual(str1, str2) | |||
self.assertEqual(str1, str2) | |||
self.assertEqual(str1, str2) | |||
self.assertGreaterEqual(str1, str2) | |||
self.assertLessEqual(str1, str2) | |||
self.assertGreater(str1, str3) | |||
self.assertGreaterEqual(str1, str3) | |||
self.assertNotEqual(str1, str3) | |||
self.assertNotEqual(str1, str3) | |||
self.assertGreaterEqual(str1, str3) | |||
self.assertGreater(str1, str3) | |||
self.assertLessEqual(str1, str4) | |||
self.assertGreaterEqual(str1, str4) | |||
self.assertEqual(str1, str4) | |||
self.assertEqual(str1, str4) | |||
self.assertGreaterEqual(str1, str4) | |||
self.assertLessEqual(str1, str4) | |||
self.assertLessEqual(str5, str1) | |||
self.assertLess(str5, str1) | |||
self.assertNotEqual(str5, str1) | |||
self.assertNotEqual(str5, str1) | |||
self.assertLess(str5, str1) | |||
self.assertLessEqual(str5, str1) | |||
def test_other_magics(self): | |||
"""test other magically implemented features, like len() and iter()""" | |||
@@ -161,13 +155,13 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertRaises(IndexError, lambda: str1[11]) | |||
self.assertRaises(IndexError, lambda: str2[0]) | |||
self.assertTrue("k" in str1) | |||
self.assertTrue("fake" in str1) | |||
self.assertTrue("str" in str1) | |||
self.assertTrue("" in str1) | |||
self.assertTrue("" in str2) | |||
self.assertFalse("real" in str1) | |||
self.assertFalse("s" in str2) | |||
self.assertIn("k", str1) | |||
self.assertIn("fake", str1) | |||
self.assertIn("str", str1) | |||
self.assertIn("", str1) | |||
self.assertIn("", str2) | |||
self.assertNotIn("real", str1) | |||
self.assertNotIn("s", str2) | |||
def test_other_methods(self): | |||
"""test the remaining non-magic methods of StringMixIn""" | |||
@@ -329,7 +323,7 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual("", str15.lower()) | |||
self.assertEqual("foobar", str16.lower()) | |||
self.assertEqual("ß", str22.lower()) | |||
if py3k and not py32: | |||
if py3k: | |||
self.assertEqual("", str15.casefold()) | |||
self.assertEqual("foobar", str16.casefold()) | |||
self.assertEqual("ss", str22.casefold()) | |||
@@ -378,7 +372,7 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual(actual, str25.rsplit(None, 3)) | |||
actual = [" this is a sentence with", "", "whitespace", ""] | |||
self.assertEqual(actual, str25.rsplit(" ", 3)) | |||
if py3k and not py32: | |||
if py3k: | |||
actual = [" this is a", "sentence", "with", "whitespace"] | |||
self.assertEqual(actual, str25.rsplit(maxsplit=3)) | |||
@@ -396,7 +390,7 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual(actual, str25.split(None, 3)) | |||
actual = ["", "", "", "this is a sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(" ", 3)) | |||
if py3k and not py32: | |||
if py3k: | |||
actual = ["this", "is", "a", "sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(maxsplit=3)) | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Tag, Template, Text | |||
@@ -22,11 +22,7 @@ | |||
from __future__ import unicode_literals | |||
from difflib import unified_diff | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import HTMLEntity, Template, Text | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Text | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.parser import tokens | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.nodes import Template, Text | |||
from mwparserfromhell.utils import parse_anything | |||
@@ -24,11 +24,7 @@ from __future__ import unicode_literals | |||
from functools import partial | |||
import re | |||
from types import GeneratorType | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import py3k, str | |||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||
@@ -21,11 +21,7 @@ | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Text, Wikilink | |||