Просмотр исходного кода

Merge develop into master (release/0.5.2)

undefined
Ben Kurtovic 5 лет назад
Родитель
Сommit
623a895cb8
47 измененных файлов: 237 добавлений и 339 удалений
  1. +0
    -1
      .coveragerc
  2. +0
    -4
      .travis.yml
  3. +10
    -1
      CHANGELOG
  4. +68
    -64
      README.rst
  5. +13
    -12
      appveyor.yml
  6. +20
    -1
      docs/changelog.rst
  7. +1
    -1
      docs/integration.rst
  8. +1
    -1
      mwparserfromhell/__init__.py
  9. +0
    -2
      mwparserfromhell/compat.py
  10. +2
    -2
      mwparserfromhell/definitions.py
  11. +2
    -2
      mwparserfromhell/nodes/extras/attribute.py
  12. +1
    -1
      mwparserfromhell/nodes/extras/parameter.py
  13. +3
    -3
      mwparserfromhell/nodes/html_entity.py
  14. +1
    -1
      mwparserfromhell/parser/__init__.py
  15. +3
    -2
      mwparserfromhell/parser/ctokenizer/definitions.c
  16. +2
    -0
      mwparserfromhell/parser/ctokenizer/tok_parse.c
  17. +1
    -1
      mwparserfromhell/parser/ctokenizer/tokenizer.c
  18. +1
    -1
      mwparserfromhell/parser/tokens.py
  19. +2
    -11
      mwparserfromhell/string_mixin.py
  20. +2
    -2
      scripts/memtest.py
  21. +3
    -4
      scripts/release.sh
  22. +8
    -11
      setup.py
  23. +3
    -3
      tests/_test_tokenizer.py
  24. +1
    -5
      tests/_test_tree_equality.py
  25. +0
    -24
      tests/discover.py
  26. +1
    -5
      tests/test_argument.py
  27. +1
    -5
      tests/test_attribute.py
  28. +1
    -5
      tests/test_builder.py
  29. +1
    -5
      tests/test_comment.py
  30. +1
    -5
      tests/test_ctokenizer.py
  31. +1
    -5
      tests/test_docs.py
  32. +1
    -5
      tests/test_external_link.py
  33. +1
    -5
      tests/test_heading.py
  34. +1
    -5
      tests/test_html_entity.py
  35. +1
    -5
      tests/test_parameter.py
  36. +1
    -5
      tests/test_parser.py
  37. +1
    -5
      tests/test_pytokenizer.py
  38. +1
    -5
      tests/test_roundtripping.py
  39. +29
    -33
      tests/test_smart_list.py
  40. +40
    -46
      tests/test_string_mixin.py
  41. +1
    -5
      tests/test_tag.py
  42. +1
    -5
      tests/test_template.py
  43. +1
    -5
      tests/test_text.py
  44. +1
    -5
      tests/test_tokens.py
  45. +1
    -5
      tests/test_utils.py
  46. +1
    -5
      tests/test_wikicode.py
  47. +1
    -5
      tests/test_wikilink.py

+ 0
- 1
.coveragerc Просмотреть файл

@@ -6,4 +6,3 @@ partial_branches =
pragma: no branch
if py3k:
if not py3k:
if py26:

+ 0
- 4
.travis.yml Просмотреть файл

@@ -1,16 +1,12 @@
language: python
python:
- 2.6
- 2.7
- 3.2
- 3.3
- 3.4
- 3.5
- 3.6
- nightly
sudo: false
install:
- if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]]; then pip install coverage==3.7.1; fi
- pip install coveralls
- python setup.py build
script:


+ 10
- 1
CHANGELOG Просмотреть файл

@@ -1,4 +1,13 @@
v0.5.1 (released March 03, 2018):
v0.5.2 (released November 1, 2018):

- Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. (#199, #204)
- Fixed signals getting stuck inside the C tokenizer until parsing finishes,
in pathological cases. (#206)
- Fixed <wbr> not being considered a single-only tag. (#200)
- Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. (#208)
- Cleaned up some minor documentation issues. (#207)

v0.5.1 (released March 3, 2018):

- Improved behavior when adding parameters to templates (via Template.add())
with poorly formatted whitespace conventions. (#185)


+ 68
- 64
README.rst Просмотреть файл

@@ -35,81 +35,81 @@ You can run the comprehensive unit testing suite with
Usage
-----

Normal usage is rather straightforward (where ``text`` is page text)::
Normal usage is rather straightforward (where ``text`` is page text):

>>> import mwparserfromhell
>>> wikicode = mwparserfromhell.parse(text)
>>> import mwparserfromhell
>>> wikicode = mwparserfromhell.parse(text)

``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an
ordinary ``str`` object (or ``unicode`` in Python 2) with some extra methods.
For example::
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
>>> wikicode = mwparserfromhell.parse(text)
>>> print(wikicode)
I has a template! {{foo|bar|baz|eggs=spam}} See it?
>>> templates = wikicode.filter_templates()
>>> print(templates)
['{{foo|bar|baz|eggs=spam}}']
>>> template = templates[0]
>>> print(template.name)
foo
>>> print(template.params)
['bar', 'baz', 'eggs=spam']
>>> print(template.get(1).value)
bar
>>> print(template.get("eggs").value)
spam
Since nodes can contain other nodes, getting nested templates is trivial::
>>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}"
>>> mwparserfromhell.parse(text).filter_templates()
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']
For example:
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
>>> wikicode = mwparserfromhell.parse(text)
>>> print(wikicode)
I has a template! {{foo|bar|baz|eggs=spam}} See it?
>>> templates = wikicode.filter_templates()
>>> print(templates)
['{{foo|bar|baz|eggs=spam}}']
>>> template = templates[0]
>>> print(template.name)
foo
>>> print(template.params)
['bar', 'baz', 'eggs=spam']
>>> print(template.get(1).value)
bar
>>> print(template.get("eggs").value)
spam
Since nodes can contain other nodes, getting nested templates is trivial:
>>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}"
>>> mwparserfromhell.parse(text).filter_templates()
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']

You can also pass ``recursive=False`` to ``filter_templates()`` and explore
templates manually. This is possible because nodes can contain additional
``Wikicode`` objects::
>>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
>>> print(code.filter_templates(recursive=False))
['{{foo|this {{includes a|template}}}}']
>>> foo = code.filter_templates(recursive=False)[0]
>>> print(foo.get(1).value)
this {{includes a|template}}
>>> print(foo.get(1).value.filter_templates()[0])
{{includes a|template}}
>>> print(foo.get(1).value.filter_templates()[0].get(1).value)
template
``Wikicode`` objects:
>>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
>>> print(code.filter_templates(recursive=False))
['{{foo|this {{includes a|template}}}}']
>>> foo = code.filter_templates(recursive=False)[0]
>>> print(foo.get(1).value)
this {{includes a|template}}
>>> print(foo.get(1).value.filter_templates()[0])
{{includes a|template}}
>>> print(foo.get(1).value.filter_templates()[0].get(1).value)
template

Templates can be easily modified to add, remove, or alter params. ``Wikicode``
objects can be treated like lists, with ``append()``, ``insert()``,
``remove()``, ``replace()``, and more. They also have a ``matches()`` method
for comparing page or template names, which takes care of capitalization and
whitespace::
>>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"
>>> code = mwparserfromhell.parse(text)
>>> for template in code.filter_templates():
... if template.name.matches("Cleanup") and not template.has("date"):
... template.add("date", "July 2012")
...
>>> print(code)
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}
>>> code.replace("{{uncategorized}}", "{{bar-stub}}")
>>> print(code)
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
>>> print(code.filter_templates())
['{{cleanup|date=July 2012}}', '{{bar-stub}}']
whitespace:
>>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"
>>> code = mwparserfromhell.parse(text)
>>> for template in code.filter_templates():
... if template.name.matches("Cleanup") and not template.has("date"):
... template.add("date", "July 2012")
...
>>> print(code)
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}
>>> code.replace("{{uncategorized}}", "{{bar-stub}}")
>>> print(code)
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
>>> print(code.filter_templates())
['{{cleanup|date=July 2012}}', '{{bar-stub}}']

You can then convert ``code`` back into a regular ``str`` object (for
saving the page!) by calling ``str()`` on it::
saving the page!) by calling ``str()`` on it:

>>> text = str(code)
>>> print(text)
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
>>> text == code
True
>>> text = str(code)
>>> print(text)
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}
>>> text == code
True

Likewise, use ``unicode(code)`` in Python 2.

@@ -164,7 +164,9 @@ Integration
``Page`` objects have a ``parse`` method that essentially calls
``mwparserfromhell.parse()`` on ``page.get()``.

If you're using Pywikibot_, your code might look like this::
If you're using Pywikibot_, your code might look like this:

.. code-block:: python

import mwparserfromhell
import pywikibot
@@ -176,7 +178,9 @@ If you're using Pywikibot_, your code might look like this::
return mwparserfromhell.parse(text)

If you're not using a library, you can parse any page using the following
Python 3 code (via the API_)::
Python 3 code (via the API_):

.. code-block:: python

import json
from urllib.parse import urlencode
@@ -189,11 +193,11 @@ Python 3 code (via the API_)::
"rvprop": "content", "format": "json", "titles": title}
raw = urlopen(API_URL, urlencode(data).encode()).read()
res = json.loads(raw)
text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
return mwparserfromhell.parse(text)

.. _MediaWiki: http://mediawiki.org
.. _ReadTheDocs: http://mwparserfromhell.readthedocs.org
.. _ReadTheDocs: http://mwparserfromhell.readthedocs.io
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3
.. _Legoktm: http://en.wikipedia.org/wiki/User:Legoktm


+ 13
- 12
appveyor.yml Просмотреть файл

@@ -1,6 +1,6 @@
# This config file is used by appveyor.com to build Windows release binaries

version: 0.5.1-b{build}
version: 0.5.2-b{build}

branches:
only:
@@ -13,8 +13,9 @@ environment:
global:
# See: http://stackoverflow.com/a/13751649/163740
WRAPPER: "cmd /E:ON /V:ON /C .\\scripts\\win_wrapper.cmd"
PIP: "%WRAPPER% %PYTHON%\\Scripts\\pip.exe"
SETUPPY: "%WRAPPER% %PYTHON%\\python setup.py --with-extension"
PIP: "%WRAPPER% %PYTHON%\\python.exe -m pip"
SETUPPY: "%WRAPPER% %PYTHON%\\python.exe setup.py --with-extension"
TWINE: "%WRAPPER% %PYTHON%\\python.exe -m twine"
PYPI_USERNAME: "earwigbot"
PYPI_PASSWORD:
secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+
@@ -28,14 +29,6 @@ environment:
PYTHON_VERSION: "2.7"
PYTHON_ARCH: "64"

- PYTHON: "C:\\Python33"
PYTHON_VERSION: "3.3"
PYTHON_ARCH: "32"

- PYTHON: "C:\\Python33-x64"
PYTHON_VERSION: "3.3"
PYTHON_ARCH: "64"

- PYTHON: "C:\\Python34"
PYTHON_VERSION: "3.4"
PYTHON_ARCH: "32"
@@ -60,6 +53,14 @@ environment:
PYTHON_VERSION: "3.6"
PYTHON_ARCH: "64"

- PYTHON: "C:\\Python37"
PYTHON_VERSION: "3.7"
PYTHON_ARCH: "32"

- PYTHON: "C:\\Python37-x64"
PYTHON_VERSION: "3.7"
PYTHON_ARCH: "64"

install:
- "%PIP% install --disable-pip-version-check --user --upgrade pip"
- "%PIP% install wheel twine"
@@ -74,7 +75,7 @@ after_test:
- "%SETUPPY% bdist_wheel"

on_success:
- "IF %APPVEYOR_REPO_BRANCH%==master %WRAPPER% %PYTHON%\\python -m twine upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%"
- "IF %APPVEYOR_REPO_BRANCH%==master %TWINE% upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%"

artifacts:
- path: dist\*


+ 20
- 1
docs/changelog.rst Просмотреть файл

@@ -1,10 +1,29 @@
Changelog
=========

v0.5.2
------

`Released November 1, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.2>`_
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5.1...v0.5.2>`__):

- Dropped support for end-of-life Python versions 2.6, 3.2, 3.3.
(`#199 <https://github.com/earwig/mwparserfromhell/issues/199>`,
`#204 <https://github.com/earwig/mwparserfromhell/pull/204>`)
- Fixed signals getting stuck inside the C tokenizer until parsing finishes,
in pathological cases.
(`#206 <https://github.com/earwig/mwparserfromhell/issues/206>`)
- Fixed `<wbr>` not being considered a single-only tag.
(`#200 <https://github.com/earwig/mwparserfromhell/pull/200>`)
- Fixed a C tokenizer crash on Python 3.7 when compiled with assertions.
(`#208 <https://github.com/earwig/mwparserfromhell/issues/208>`)
- Cleaned up some minor documentation issues.
(`#207 <https://github.com/earwig/mwparserfromhell/pull/207>`)

v0.5.1
------

`Released March 03, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.1>`_
`Released March 3, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.1>`_
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5...v0.5.1>`__):

- Improved behavior when adding parameters to templates (via


+ 1
- 1
docs/integration.rst Просмотреть файл

@@ -32,7 +32,7 @@ If you're not using a library, you can parse any page using the following code
"rvprop": "content", "format": "json", "titles": title}
raw = urlopen(API_URL, urlencode(data).encode()).read()
res = json.loads(raw)
text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
return mwparserfromhell.parse(text)

.. _EarwigBot: https://github.com/earwig/earwigbot


+ 1
- 1
mwparserfromhell/__init__.py Просмотреть файл

@@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
__author__ = "Ben Kurtovic"
__copyright__ = "Copyright (C) 2012-2018 Ben Kurtovic"
__license__ = "MIT License"
__version__ = "0.5.1"
__version__ = "0.5.2"
__email__ = "ben.kurtovic@gmail.com"

from . import (compat, definitions, nodes, parser, smart_list, string_mixin,


+ 0
- 2
mwparserfromhell/compat.py Просмотреть файл

@@ -10,9 +10,7 @@ meant to be imported directly from within the parser's modules.

import sys

py26 = (sys.version_info[0] == 2) and (sys.version_info[1] == 6)
py3k = (sys.version_info[0] == 3)
py32 = py3k and (sys.version_info[1] == 2)

if py3k:
bytes = bytes


+ 2
- 2
mwparserfromhell/definitions.py Просмотреть файл

@@ -56,8 +56,8 @@ INVISIBLE_TAGS = [
"section", "templatedata", "timeline"
]

# [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762
SINGLE_ONLY = ["br", "hr", "meta", "link", "img"]
# [mediawiki/core.git]/includes/Sanitizer.php @ 065bec63ea
SINGLE_ONLY = ["br", "hr", "meta", "link", "img", "wbr"]
SINGLE = SINGLE_ONLY + ["li", "dt", "dd", "th", "td", "tr"]

MARKUP_TO_HTML = {


+ 2
- 2
mwparserfromhell/nodes/extras/attribute.py Просмотреть файл

@@ -40,7 +40,7 @@ class Attribute(StringMixIn):
pad_before_eq="", pad_after_eq="", check_quotes=True):
super(Attribute, self).__init__()
if check_quotes and not quotes and self._value_needs_quotes(value):
raise ValueError("given value {0!r} requires quotes".format(value))
raise ValueError("given value {!r} requires quotes".format(value))
self._name = name
self._value = value
self._quotes = quotes
@@ -79,7 +79,7 @@ class Attribute(StringMixIn):
"""Coerce a quote type into an acceptable value, or raise an error."""
orig, quotes = quotes, str(quotes) if quotes else None
if quotes not in [None, '"', "'"]:
raise ValueError("{0!r} is not a valid quote type".format(orig))
raise ValueError("{!r} is not a valid quote type".format(orig))
return quotes

@property


+ 1
- 1
mwparserfromhell/nodes/extras/parameter.py Просмотреть файл

@@ -41,7 +41,7 @@ class Parameter(StringMixIn):
def __init__(self, name, value, showkey=True):
super(Parameter, self).__init__()
if not showkey and not self.can_hide_key(name):
raise ValueError("key {0!r} cannot be hidden".format(name))
raise ValueError("key {!r} cannot be hidden".format(name))
self._name = name
self._value = value
self._showkey = showkey


+ 3
- 3
mwparserfromhell/nodes/html_entity.py Просмотреть файл

@@ -53,10 +53,10 @@ class HTMLEntity(Node):

def __unicode__(self):
if self.named:
return "&{0};".format(self.value)
return "&{};".format(self.value)
if self.hexadecimal:
return "&#{0}{1};".format(self.hex_char, self.value)
return "&#{0};".format(self.value)
return "&#{}{};".format(self.hex_char, self.value)
return "&#{};".format(self.value)

def __strip__(self, **kwargs):
if kwargs.get("normalize"):


+ 1
- 1
mwparserfromhell/parser/__init__.py Просмотреть файл

@@ -35,7 +35,7 @@ class ParserError(Exception):
can happen. Its appearance indicates a bug.
"""
def __init__(self, extra):
msg = "This is a bug and should be reported. Info: {0}.".format(extra)
msg = "This is a bug and should be reported. Info: {}.".format(extra)
super(ParserError, self).__init__(msg)




+ 3
- 2
mwparserfromhell/parser/ctokenizer/definitions.c Просмотреть файл

@@ -45,11 +45,12 @@ static const char* PARSER_BLACKLIST[] = {
};

static const char* SINGLE[] = {
"br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", NULL
"br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr",
"wbr", NULL
};

static const char* SINGLE_ONLY[] = {
"br", "hr", "meta", "link", "img", NULL
"br", "hr", "meta", "link", "img", "wbr", NULL
};

/*


+ 2
- 0
mwparserfromhell/parser/ctokenizer/tok_parse.c Просмотреть файл

@@ -2603,6 +2603,8 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
}
if (!this)
return Tokenizer_handle_end(self, this_context);
if (PyErr_CheckSignals())
return NULL;
next = Tokenizer_read(self, 1);
last = Tokenizer_read_backwards(self, 1);
if (this == next && next == '{') {


+ 1
- 1
mwparserfromhell/parser/ctokenizer/tokenizer.c Просмотреть файл

@@ -207,7 +207,7 @@ static int load_entities(void)
if (!deflist)
return -1;
Py_DECREF(defmap);
numdefs = (unsigned) PyList_GET_SIZE(defmap);
numdefs = (unsigned) PyList_GET_SIZE(deflist);
entitydefs = calloc(numdefs + 1, sizeof(char*));
if (!entitydefs)
return -1;


+ 1
- 1
mwparserfromhell/parser/tokens.py Просмотреть файл

@@ -44,7 +44,7 @@ class Token(dict):
args.append(key + "=" + repr(value[:97] + "..."))
else:
args.append(key + "=" + repr(value))
return "{0}({1})".format(type(self).__name__, ", ".join(args))
return "{}({})".format(type(self).__name__, ", ".join(args))

def __eq__(self, other):
return isinstance(other, type(self)) and dict.__eq__(self, other)


+ 2
- 11
mwparserfromhell/string_mixin.py Просмотреть файл

@@ -28,7 +28,7 @@ interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner.
from __future__ import unicode_literals
from sys import getdefaultencoding

from .compat import bytes, py26, py3k, str
from .compat import bytes, py3k, str

__all__ = ["StringMixIn"]

@@ -109,21 +109,12 @@ class StringMixIn(object):

def __getattr__(self, attr):
if not hasattr(str, attr):
raise AttributeError("{0!r} object has no attribute {1!r}".format(
raise AttributeError("{!r} object has no attribute {!r}".format(
type(self).__name__, attr))
return getattr(self.__unicode__(), attr)

if py3k:
maketrans = str.maketrans # Static method can't rely on __getattr__

if py26:
@inheritdoc
def encode(self, encoding=None, errors=None):
if encoding is None:
encoding = getdefaultencoding()
if errors is not None:
return self.__unicode__().encode(encoding, errors)
return self.__unicode__().encode(encoding)


del inheritdoc

+ 2
- 2
scripts/memtest.py Просмотреть файл

@@ -80,7 +80,7 @@ class MemoryTest(object):
raw = raw.encode("raw_unicode_escape")
data["input"] = raw.decode("unicode_escape")
number = str(counter).zfill(digits)
fname = "test_{0}{1}_{2}".format(name, number, data["name"])
fname = "test_{}{}_{}".format(name, number, data["name"])
self._tests.append((fname, data["input"]))
counter += 1

@@ -117,7 +117,7 @@ class MemoryTest(object):
tmpl = "{0}LEAKING{1}: {2:n} bytes, {3:.2%} inc ({4:n} bytes/loop)"
sys.stdout.write(tmpl.format(Color.YELLOW, Color.RESET, d, p, bpt))
else:
sys.stdout.write("{0}OK{1}".format(Color.GREEN, Color.RESET))
sys.stdout.write("{}OK{}".format(Color.GREEN, Color.RESET))

def run(self):
"""Run the memory test suite."""


+ 3
- 4
scripts/release.sh Просмотреть файл

@@ -9,7 +9,7 @@ fi

VERSION=$1
SCRIPT_DIR=$(dirname "$0")
RELEASE_DATE=$(date +"%B %d, %Y")
RELEASE_DATE=$(date +"%B %-d, %Y")

check_git() {
if [[ -n "$(git status --porcelain --untracked-files=no)" ]]; then
@@ -76,9 +76,8 @@ do_git_stuff() {
}

upload_to_pypi() {
echo -n "PyPI: uploading source tarball and docs..."
echo -n "PyPI: uploading source tarball..."
python setup.py -q register sdist upload -s
python setup.py -q upload_docs
echo " done."
}

@@ -88,7 +87,7 @@ post_release() {
echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION"
echo "*** Verify: https://pypi.python.org/pypi/mwparserfromhell"
echo "*** Verify: https://ci.appveyor.com/project/earwig/mwparserfromhell"
echo "*** Verify: https://mwparserfromhell.readthedocs.org"
echo "*** Verify: https://mwparserfromhell.readthedocs.io"
echo "*** Press enter to sanity-check the release."
read
}


+ 8
- 11
setup.py Просмотреть файл

@@ -27,15 +27,15 @@ from glob import glob
from os import environ
import sys

if ((sys.version_info[0] == 2 and sys.version_info[1] < 6) or
(sys.version_info[1] == 3 and sys.version_info[1] < 2)):
raise RuntimeError("mwparserfromhell needs Python 2.6+ or 3.2+")
if ((sys.version_info[0] == 2 and sys.version_info[1] < 7) or
(sys.version_info[1] == 3 and sys.version_info[1] < 4)):
raise RuntimeError("mwparserfromhell needs Python 2.7 or 3.4+")

from setuptools import setup, find_packages, Extension
from setuptools.command.build_ext import build_ext

from mwparserfromhell import __version__
from mwparserfromhell.compat import py26, py3k
from mwparserfromhell.compat import py3k

with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp:
long_docs = fp.read()
@@ -76,21 +76,20 @@ if fallback:

tokenizer = Extension("mwparserfromhell.parser._tokenizer",
sources=sorted(glob("mwparserfromhell/parser/ctokenizer/*.c")),
depends=glob("mwparserfromhell/parser/ctokenizer/*.h"))
depends=sorted(glob("mwparserfromhell/parser/ctokenizer/*.h")))

setup(
name = "mwparserfromhell",
packages = find_packages(exclude=("tests",)),
ext_modules = [tokenizer] if use_extension else [],
tests_require = ["unittest2"] if py26 else [],
test_suite = "tests.discover",
test_suite = "tests",
version = __version__,
author = "Ben Kurtovic",
author_email = "ben.kurtovic@gmail.com",
url = "https://github.com/earwig/mwparserfromhell",
description = "MWParserFromHell is a parser for MediaWiki wikicode.",
long_description = long_docs,
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{0}".format(__version__),
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{}".format(__version__),
keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing",
license = "MIT License",
classifiers = [
@@ -99,11 +98,9 @@ setup(
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",


+ 3
- 3
tests/_test_tokenizer.py Просмотреть файл

@@ -109,7 +109,7 @@ class TokenizerTestCase(object):
print(error.format(filename))
continue
if data["input"] is None or data["output"] is None:
error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output"
error = "Test '{}' in '{}' was ignored because it lacked an input or an output"
print(error.format(data["name"], filename))
continue

@@ -118,7 +118,7 @@ class TokenizerTestCase(object):
if restrict and data["name"] != restrict:
continue

fname = "test_{0}{1}_{2}".format(name, number, data["name"])
fname = "test_{}{}_{}".format(name, number, data["name"])
meth = cls._build_test_method(fname, data)
setattr(cls, fname, meth)

@@ -126,7 +126,7 @@ class TokenizerTestCase(object):
def build(cls):
"""Load and install all tests from the 'tokenizer' directory."""
def load_file(filename, restrict=None):
with codecs.open(filename, "rU", encoding="utf8") as fp:
with codecs.open(filename, "r", encoding="utf8") as fp:
text = fp.read()
name = path.split(filename)[1][:-len(extension)]
cls._load_tests(filename, name, text, restrict)


+ 1
- 5
tests/_test_tree_equality.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
from unittest2 import TestCase
except ImportError:
from unittest import TestCase
from unittest import TestCase

from mwparserfromhell.compat import range
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,


+ 0
- 24
tests/discover.py Просмотреть файл

@@ -1,24 +0,0 @@
# -*- coding: utf-8 -*-

"""
Discover tests using ``unittest2` for Python 2.6.

It appears the default distutils test suite doesn't play nice with
``setUpClass`` thereby making some tests fail. Using ``unittest2`` to load
tests seems to work around that issue.

http://stackoverflow.com/a/17004409/753501
"""

import os.path

from mwparserfromhell.compat import py26

if py26:
import unittest2 as unittest
else:
import unittest

def additional_tests():
project_root = os.path.split(os.path.dirname(__file__))[0]
return unittest.defaultTestLoader.discover(project_root)

+ 1
- 5
tests/test_argument.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import Argument, Text


+ 1
- 5
tests/test_attribute.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import Template


+ 1
- 5
tests/test_builder.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import py3k
from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading,


+ 1
- 5
tests/test_comment.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import Comment


+ 1
- 5
tests/test_ctokenizer.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

try:
from mwparserfromhell.parser._tokenizer import CTokenizer


+ 1
- 5
tests/test_docs.py Просмотреть файл

@@ -23,11 +23,7 @@
from __future__ import print_function, unicode_literals
import json
import os

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

import mwparserfromhell
from mwparserfromhell.compat import py3k, str


+ 1
- 5
tests/test_external_link.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import ExternalLink, Text


+ 1
- 5
tests/test_heading.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import Heading, Text


+ 1
- 5
tests/test_html_entity.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import HTMLEntity


+ 1
- 5
tests/test_parameter.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import Text


+ 1
- 5
tests/test_parser.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell import parser
from mwparserfromhell.compat import range


+ 1
- 5
tests/test_pytokenizer.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.parser.tokenizer import Tokenizer



+ 1
- 5
tests/test_roundtripping.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from ._test_tokenizer import TokenizerTestCase



+ 29
- 33
tests/test_smart_list.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import py3k, range
from mwparserfromhell.smart_list import SmartList, _ListProxy
@@ -139,36 +135,36 @@ class TestSmartList(unittest.TestCase):
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1))
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1))

self.assertTrue(list1 < list3)
self.assertTrue(list1 <= list3)
self.assertFalse(list1 == list3)
self.assertTrue(list1 != list3)
self.assertFalse(list1 > list3)
self.assertFalse(list1 >= list3)
self.assertLess(list1, list3)
self.assertLessEqual(list1, list3)
self.assertNotEqual(list1, list3)
self.assertNotEqual(list1, list3)
self.assertLessEqual(list1, list3)
self.assertLess(list1, list3)

other1 = [0, 2, 3, 4]
self.assertTrue(list1 < other1)
self.assertTrue(list1 <= other1)
self.assertFalse(list1 == other1)
self.assertTrue(list1 != other1)
self.assertFalse(list1 > other1)
self.assertFalse(list1 >= other1)
self.assertLess(list1, other1)
self.assertLessEqual(list1, other1)
self.assertNotEqual(list1, other1)
self.assertNotEqual(list1, other1)
self.assertLessEqual(list1, other1)
self.assertLess(list1, other1)

other2 = [0, 0, 1, 2]
self.assertFalse(list1 < other2)
self.assertFalse(list1 <= other2)
self.assertFalse(list1 == other2)
self.assertTrue(list1 != other2)
self.assertTrue(list1 > other2)
self.assertTrue(list1 >= other2)
self.assertGreaterEqual(list1, other2)
self.assertGreater(list1, other2)
self.assertNotEqual(list1, other2)
self.assertNotEqual(list1, other2)
self.assertGreater(list1, other2)
self.assertGreaterEqual(list1, other2)

other3 = [0, 1, 2, 3, "one", "two"]
self.assertFalse(list1 < other3)
self.assertTrue(list1 <= other3)
self.assertTrue(list1 == other3)
self.assertFalse(list1 != other3)
self.assertFalse(list1 > other3)
self.assertTrue(list1 >= other3)
self.assertGreaterEqual(list1, other3)
self.assertLessEqual(list1, other3)
self.assertEqual(list1, other3)
self.assertEqual(list1, other3)
self.assertLessEqual(list1, other3)
self.assertGreaterEqual(list1, other3)

self.assertTrue(bool(list1))
self.assertFalse(bool(list2))
@@ -198,10 +194,10 @@ class TestSmartList(unittest.TestCase):
self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
self.assertEqual([], list(reversed(list2)))

self.assertTrue("one" in list1)
self.assertTrue(3 in list1)
self.assertFalse(10 in list1)
self.assertFalse(0 in list2)
self.assertIn("one", list1)
self.assertIn(3, list1)
self.assertNotIn(10, list1)
self.assertNotIn(0, list2)

self.assertEqual([], list2 * 5)
self.assertEqual([], 5 * list2)


+ 40
- 46
tests/test_string_mixin.py Просмотреть файл

@@ -23,13 +23,9 @@
from __future__ import unicode_literals
from sys import getdefaultencoding
from types import GeneratorType
import unittest

try:
import unittest2 as unittest
except ImportError:
import unittest

from mwparserfromhell.compat import bytes, py3k, py32, range, str
from mwparserfromhell.compat import bytes, py3k, range, str
from mwparserfromhell.string_mixin import StringMixIn

class _FakeString(StringMixIn):
@@ -54,9 +50,7 @@ class TestStringMixIn(unittest.TestCase):
"rsplit", "rstrip", "split", "splitlines", "startswith", "strip",
"swapcase", "title", "translate", "upper", "zfill"]
if py3k:
if not py32:
methods.append("casefold")
methods.extend(["format_map", "isidentifier", "isprintable",
methods.extend(["casefold", "format_map", "isidentifier", "isprintable",
"maketrans"])
else:
methods.append("decode")
@@ -90,33 +84,33 @@ class TestStringMixIn(unittest.TestCase):
str4 = "this is a fake string"
str5 = "fake string, this is"

self.assertFalse(str1 > str2)
self.assertTrue(str1 >= str2)
self.assertTrue(str1 == str2)
self.assertFalse(str1 != str2)
self.assertFalse(str1 < str2)
self.assertTrue(str1 <= str2)
self.assertTrue(str1 > str3)
self.assertTrue(str1 >= str3)
self.assertFalse(str1 == str3)
self.assertTrue(str1 != str3)
self.assertFalse(str1 < str3)
self.assertFalse(str1 <= str3)
self.assertFalse(str1 > str4)
self.assertTrue(str1 >= str4)
self.assertTrue(str1 == str4)
self.assertFalse(str1 != str4)
self.assertFalse(str1 < str4)
self.assertTrue(str1 <= str4)
self.assertFalse(str5 > str1)
self.assertFalse(str5 >= str1)
self.assertFalse(str5 == str1)
self.assertTrue(str5 != str1)
self.assertTrue(str5 < str1)
self.assertTrue(str5 <= str1)
self.assertLessEqual(str1, str2)
self.assertGreaterEqual(str1, str2)
self.assertEqual(str1, str2)
self.assertEqual(str1, str2)
self.assertGreaterEqual(str1, str2)
self.assertLessEqual(str1, str2)
self.assertGreater(str1, str3)
self.assertGreaterEqual(str1, str3)
self.assertNotEqual(str1, str3)
self.assertNotEqual(str1, str3)
self.assertGreaterEqual(str1, str3)
self.assertGreater(str1, str3)
self.assertLessEqual(str1, str4)
self.assertGreaterEqual(str1, str4)
self.assertEqual(str1, str4)
self.assertEqual(str1, str4)
self.assertGreaterEqual(str1, str4)
self.assertLessEqual(str1, str4)
self.assertLessEqual(str5, str1)
self.assertLess(str5, str1)
self.assertNotEqual(str5, str1)
self.assertNotEqual(str5, str1)
self.assertLess(str5, str1)
self.assertLessEqual(str5, str1)

def test_other_magics(self):
"""test other magically implemented features, like len() and iter()"""
@@ -161,13 +155,13 @@ class TestStringMixIn(unittest.TestCase):
self.assertRaises(IndexError, lambda: str1[11])
self.assertRaises(IndexError, lambda: str2[0])

self.assertTrue("k" in str1)
self.assertTrue("fake" in str1)
self.assertTrue("str" in str1)
self.assertTrue("" in str1)
self.assertTrue("" in str2)
self.assertFalse("real" in str1)
self.assertFalse("s" in str2)
self.assertIn("k", str1)
self.assertIn("fake", str1)
self.assertIn("str", str1)
self.assertIn("", str1)
self.assertIn("", str2)
self.assertNotIn("real", str1)
self.assertNotIn("s", str2)

def test_other_methods(self):
"""test the remaining non-magic methods of StringMixIn"""
@@ -329,7 +323,7 @@ class TestStringMixIn(unittest.TestCase):
self.assertEqual("", str15.lower())
self.assertEqual("foobar", str16.lower())
self.assertEqual("ß", str22.lower())
if py3k and not py32:
if py3k:
self.assertEqual("", str15.casefold())
self.assertEqual("foobar", str16.casefold())
self.assertEqual("ss", str22.casefold())
@@ -378,7 +372,7 @@ class TestStringMixIn(unittest.TestCase):
self.assertEqual(actual, str25.rsplit(None, 3))
actual = [" this is a sentence with", "", "whitespace", ""]
self.assertEqual(actual, str25.rsplit(" ", 3))
if py3k and not py32:
if py3k:
actual = [" this is a", "sentence", "with", "whitespace"]
self.assertEqual(actual, str25.rsplit(maxsplit=3))

@@ -396,7 +390,7 @@ class TestStringMixIn(unittest.TestCase):
self.assertEqual(actual, str25.split(None, 3))
actual = ["", "", "", "this is a sentence with whitespace "]
self.assertEqual(actual, str25.split(" ", 3))
if py3k and not py32:
if py3k:
actual = ["this", "is", "a", "sentence with whitespace "]
self.assertEqual(actual, str25.split(maxsplit=3))



+ 1
- 5
tests/test_tag.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import Tag, Template, Text


+ 1
- 5
tests/test_template.py Просмотреть файл

@@ -22,11 +22,7 @@

from __future__ import unicode_literals
from difflib import unified_diff

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import HTMLEntity, Template, Text


+ 1
- 5
tests/test_text.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import Text


+ 1
- 5
tests/test_tokens.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import py3k
from mwparserfromhell.parser import tokens


+ 1
- 5
tests/test_utils.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.nodes import Template, Text
from mwparserfromhell.utils import parse_anything


+ 1
- 5
tests/test_wikicode.py Просмотреть файл

@@ -24,11 +24,7 @@ from __future__ import unicode_literals
from functools import partial
import re
from types import GeneratorType

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import py3k, str
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,


+ 1
- 5
tests/test_wikilink.py Просмотреть файл

@@ -21,11 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals

try:
import unittest2 as unittest
except ImportError:
import unittest
import unittest

from mwparserfromhell.compat import str
from mwparserfromhell.nodes import Text, Wikilink


Загрузка…
Отмена
Сохранить