@@ -6,4 +6,3 @@ partial_branches = | |||
pragma: no branch | |||
if py3k: | |||
if not py3k: | |||
if py26: |
@@ -12,3 +12,4 @@ dist | |||
docs/_build | |||
scripts/*.log | |||
htmlcov/ | |||
.idea/ |
@@ -1,20 +1,16 @@ | |||
dist: xenial | |||
language: python | |||
python: | |||
- 2.6 | |||
- 2.7 | |||
- 3.2 | |||
- 3.3 | |||
- 3.4 | |||
- 3.5 | |||
- 3.6 | |||
- nightly | |||
sudo: false | |||
- 3.7 | |||
- 3.8 | |||
install: | |||
- if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]]; then pip install coverage==3.7.1; fi | |||
- pip install coveralls | |||
- python setup.py build | |||
- python setup.py develop | |||
script: | |||
- coverage run --source=mwparserfromhell setup.py -q test | |||
- coverage run --source=mwparserfromhell -m unittest discover | |||
after_success: | |||
- coveralls | |||
env: | |||
@@ -1,6 +1,30 @@ | |||
v0.6 (unreleased): | |||
- ... | |||
- Added support for Python 3.8. | |||
- Updated Wikicode.matches() to recognize underscores as being equivalent | |||
to spaces. (#216) | |||
- Fixed a rare parsing bug involving deeply nested style tags. (#224) | |||
v0.5.4 (released May 15, 2019): | |||
- Fixed an unlikely crash in the C tokenizer when interrupted while parsing | |||
a heading. | |||
v0.5.3 (released March 30, 2019): | |||
- Fixed manual construction of Node objects, previously unsupported. (#214) | |||
- Fixed Wikicode transformation methods (replace(), remove(), etc.) when passed | |||
an empty section as an argument. (#212) | |||
- Fixed the parser getting stuck inside malformed tables. (#206) | |||
v0.5.2 (released November 1, 2018): | |||
- Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. (#199, #204) | |||
- Fixed signals getting stuck inside the C tokenizer until parsing finishes, | |||
in pathological cases. (#206) | |||
- Fixed <wbr> not being considered a single-only tag. (#200) | |||
- Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. (#208) | |||
- Cleaned up some minor documentation issues. (#207) | |||
v0.5.1 (released March 3, 2018): | |||
@@ -1,4 +1,4 @@ | |||
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
@@ -3,7 +3,7 @@ mwparserfromhell | |||
.. image:: https://img.shields.io/travis/earwig/mwparserfromhell/develop.svg | |||
:alt: Build Status | |||
:target: http://travis-ci.org/earwig/mwparserfromhell | |||
:target: https://travis-ci.org/earwig/mwparserfromhell | |||
.. image:: https://img.shields.io/coveralls/earwig/mwparserfromhell/develop.svg | |||
:alt: Coverage Status | |||
@@ -11,7 +11,7 @@ mwparserfromhell | |||
**mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package | |||
that provides an easy-to-use and outrageously powerful parser for MediaWiki_ | |||
wikicode. It supports Python 2 and Python 3. | |||
wikicode. It supports Python 3.4+. | |||
Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. | |||
Full documentation is available on ReadTheDocs_. Development occurs on GitHub_. | |||
@@ -30,88 +30,86 @@ Alternatively, get the latest development version:: | |||
python setup.py install | |||
You can run the comprehensive unit testing suite with | |||
``python setup.py test -q``. | |||
``python -m unittest discover``. | |||
Usage | |||
----- | |||
Normal usage is rather straightforward (where ``text`` is page text):: | |||
Normal usage is rather straightforward (where ``text`` is page text): | |||
>>> import mwparserfromhell | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
>>> import mwparserfromhell | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an | |||
ordinary ``str`` object (or ``unicode`` in Python 2) with some extra methods. | |||
For example:: | |||
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
>>> print(wikicode) | |||
I has a template! {{foo|bar|baz|eggs=spam}} See it? | |||
>>> templates = wikicode.filter_templates() | |||
>>> print(templates) | |||
['{{foo|bar|baz|eggs=spam}}'] | |||
>>> template = templates[0] | |||
>>> print(template.name) | |||
foo | |||
>>> print(template.params) | |||
['bar', 'baz', 'eggs=spam'] | |||
>>> print(template.get(1).value) | |||
bar | |||
>>> print(template.get("eggs").value) | |||
spam | |||
Since nodes can contain other nodes, getting nested templates is trivial:: | |||
>>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" | |||
>>> mwparserfromhell.parse(text).filter_templates() | |||
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | |||
ordinary ``str`` object with some extra methods. | |||
For example: | |||
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
>>> print(wikicode) | |||
I has a template! {{foo|bar|baz|eggs=spam}} See it? | |||
>>> templates = wikicode.filter_templates() | |||
>>> print(templates) | |||
['{{foo|bar|baz|eggs=spam}}'] | |||
>>> template = templates[0] | |||
>>> print(template.name) | |||
foo | |||
>>> print(template.params) | |||
['bar', 'baz', 'eggs=spam'] | |||
>>> print(template.get(1).value) | |||
bar | |||
>>> print(template.get("eggs").value) | |||
spam | |||
Since nodes can contain other nodes, getting nested templates is trivial: | |||
>>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" | |||
>>> mwparserfromhell.parse(text).filter_templates() | |||
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | |||
You can also pass ``recursive=False`` to ``filter_templates()`` and explore | |||
templates manually. This is possible because nodes can contain additional | |||
``Wikicode`` objects:: | |||
>>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | |||
>>> print(code.filter_templates(recursive=False)) | |||
['{{foo|this {{includes a|template}}}}'] | |||
>>> foo = code.filter_templates(recursive=False)[0] | |||
>>> print(foo.get(1).value) | |||
this {{includes a|template}} | |||
>>> print(foo.get(1).value.filter_templates()[0]) | |||
{{includes a|template}} | |||
>>> print(foo.get(1).value.filter_templates()[0].get(1).value) | |||
template | |||
``Wikicode`` objects: | |||
>>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | |||
>>> print(code.filter_templates(recursive=False)) | |||
['{{foo|this {{includes a|template}}}}'] | |||
>>> foo = code.filter_templates(recursive=False)[0] | |||
>>> print(foo.get(1).value) | |||
this {{includes a|template}} | |||
>>> print(foo.get(1).value.filter_templates()[0]) | |||
{{includes a|template}} | |||
>>> print(foo.get(1).value.filter_templates()[0].get(1).value) | |||
template | |||
Templates can be easily modified to add, remove, or alter params. ``Wikicode`` | |||
objects can be treated like lists, with ``append()``, ``insert()``, | |||
``remove()``, ``replace()``, and more. They also have a ``matches()`` method | |||
for comparing page or template names, which takes care of capitalization and | |||
whitespace:: | |||
>>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" | |||
>>> code = mwparserfromhell.parse(text) | |||
>>> for template in code.filter_templates(): | |||
... if template.name.matches("Cleanup") and not template.has("date"): | |||
... template.add("date", "July 2012") | |||
... | |||
>>> print(code) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} | |||
>>> code.replace("{{uncategorized}}", "{{bar-stub}}") | |||
>>> print(code) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | |||
>>> print(code.filter_templates()) | |||
['{{cleanup|date=July 2012}}', '{{bar-stub}}'] | |||
whitespace: | |||
>>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" | |||
>>> code = mwparserfromhell.parse(text) | |||
>>> for template in code.filter_templates(): | |||
... if template.name.matches("Cleanup") and not template.has("date"): | |||
... template.add("date", "July 2012") | |||
... | |||
>>> print(code) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} | |||
>>> code.replace("{{uncategorized}}", "{{bar-stub}}") | |||
>>> print(code) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | |||
>>> print(code.filter_templates()) | |||
['{{cleanup|date=July 2012}}', '{{bar-stub}}'] | |||
You can then convert ``code`` back into a regular ``str`` object (for | |||
saving the page!) by calling ``str()`` on it:: | |||
saving the page!) by calling ``str()`` on it: | |||
>>> text = str(code) | |||
>>> print(text) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | |||
>>> text == code | |||
True | |||
Likewise, use ``unicode(code)`` in Python 2. | |||
>>> text = str(code) | |||
>>> print(text) | |||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | |||
>>> text == code | |||
True | |||
Limitations | |||
----------- | |||
@@ -164,7 +162,9 @@ Integration | |||
``Page`` objects have a ``parse`` method that essentially calls | |||
``mwparserfromhell.parse()`` on ``page.get()``. | |||
If you're using Pywikibot_, your code might look like this:: | |||
If you're using Pywikibot_, your code might look like this: | |||
.. code-block:: python | |||
import mwparserfromhell | |||
import pywikibot | |||
@@ -175,32 +175,44 @@ If you're using Pywikibot_, your code might look like this:: | |||
text = page.get() | |||
return mwparserfromhell.parse(text) | |||
If you're not using a library, you can parse any page using the following | |||
Python 3 code (via the API_):: | |||
If you're not using a library, you can parse any page with the following | |||
Python 3 code (using the API_ and the requests_ library): | |||
.. code-block:: python | |||
import json | |||
from urllib.parse import urlencode | |||
from urllib.request import urlopen | |||
import mwparserfromhell | |||
import requests | |||
API_URL = "https://en.wikipedia.org/w/api.php" | |||
def parse(title): | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
"rvprop": "content", "format": "json", "titles": title} | |||
raw = urlopen(API_URL, urlencode(data).encode()).read() | |||
res = json.loads(raw) | |||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | |||
params = { | |||
"action": "query", | |||
"prop": "revisions", | |||
"rvprop": "content", | |||
"rvslots": "main", | |||
"rvlimit": 1, | |||
"titles": title, | |||
"format": "json", | |||
"formatversion": "2", | |||
} | |||
headers = {"User-Agent": "My-Bot-Name/1.0"} | |||
req = requests.get(API_URL, headers=headers, params=params) | |||
res = req.json() | |||
revision = res["query"]["pages"][0]["revisions"][0] | |||
text = revision["slots"]["main"]["content"] | |||
return mwparserfromhell.parse(text) | |||
.. _MediaWiki: http://mediawiki.org | |||
.. _ReadTheDocs: http://mwparserfromhell.readthedocs.io | |||
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | |||
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | |||
.. _Legoktm: http://en.wikipedia.org/wiki/User:Legoktm | |||
.. _MediaWiki: https://www.mediawiki.org | |||
.. _ReadTheDocs: https://mwparserfromhell.readthedocs.io | |||
.. _Earwig: https://en.wikipedia.org/wiki/User:The_Earwig | |||
.. _Σ: https://en.wikipedia.org/wiki/User:%CE%A3 | |||
.. _Legoktm: https://en.wikipedia.org/wiki/User:Legoktm | |||
.. _GitHub: https://github.com/earwig/mwparserfromhell | |||
.. _Python Package Index: http://pypi.python.org | |||
.. _get pip: http://pypi.python.org/pypi/pip | |||
.. _Python Package Index: https://pypi.org/ | |||
.. _get pip: https://pypi.org/project/pip/ | |||
.. _Word-ending links: https://www.mediawiki.org/wiki/Help:Links#linktrail | |||
.. _EarwigBot: https://github.com/earwig/earwigbot | |||
.. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot | |||
.. _API: http://mediawiki.org/wiki/API | |||
.. _API: https://www.mediawiki.org/wiki/API:Main_page | |||
.. _requests: https://2.python-requests.org |
@@ -13,29 +13,15 @@ environment: | |||
global: | |||
# See: http://stackoverflow.com/a/13751649/163740 | |||
WRAPPER: "cmd /E:ON /V:ON /C .\\scripts\\win_wrapper.cmd" | |||
PIP: "%WRAPPER% %PYTHON%\\Scripts\\pip.exe" | |||
SETUPPY: "%WRAPPER% %PYTHON%\\python setup.py --with-extension" | |||
PYEXE: "%WRAPPER% %PYTHON%\\python.exe" | |||
SETUPPY: "%PYEXE% setup.py --with-extension" | |||
PIP: "%PYEXE% -m pip" | |||
TWINE: "%PYEXE% -m twine" | |||
PYPI_USERNAME: "earwigbot" | |||
PYPI_PASSWORD: | |||
secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+ | |||
matrix: | |||
- PYTHON: "C:\\Python27" | |||
PYTHON_VERSION: "2.7" | |||
PYTHON_ARCH: "32" | |||
- PYTHON: "C:\\Python27-x64" | |||
PYTHON_VERSION: "2.7" | |||
PYTHON_ARCH: "64" | |||
- PYTHON: "C:\\Python33" | |||
PYTHON_VERSION: "3.3" | |||
PYTHON_ARCH: "32" | |||
- PYTHON: "C:\\Python33-x64" | |||
PYTHON_VERSION: "3.3" | |||
PYTHON_ARCH: "64" | |||
- PYTHON: "C:\\Python34" | |||
PYTHON_VERSION: "3.4" | |||
PYTHON_ARCH: "32" | |||
@@ -60,21 +46,38 @@ environment: | |||
PYTHON_VERSION: "3.6" | |||
PYTHON_ARCH: "64" | |||
- PYTHON: "C:\\Python37" | |||
PYTHON_VERSION: "3.7" | |||
PYTHON_ARCH: "32" | |||
- PYTHON: "C:\\Python37-x64" | |||
PYTHON_VERSION: "3.7" | |||
PYTHON_ARCH: "64" | |||
- PYTHON: "C:\\Python38" | |||
PYTHON_VERSION: "3.8" | |||
PYTHON_ARCH: "32" | |||
- PYTHON: "C:\\Python38-x64" | |||
PYTHON_VERSION: "3.8" | |||
PYTHON_ARCH: "64" | |||
install: | |||
- "%PIP% install --disable-pip-version-check --user --upgrade pip" | |||
- "%PIP% install wheel twine" | |||
build_script: | |||
- "%SETUPPY% build" | |||
- "%SETUPPY% develop --user" | |||
test_script: | |||
- "%SETUPPY% -q test" | |||
- "%PYEXE% -m unittest discover" | |||
after_test: | |||
- "%SETUPPY% bdist_wheel" | |||
on_success: | |||
- "IF %APPVEYOR_REPO_BRANCH%==master %WRAPPER% %PYTHON%\\python -m twine upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%" | |||
- "IF %APPVEYOR_REPO_BRANCH%==master %TWINE% upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%" | |||
artifacts: | |||
- path: dist\* | |||
@@ -5,9 +5,55 @@ v0.6 | |||
---- | |||
Unreleased | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5.1...develop>`__): | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5.4...develop>`__): | |||
- ... | |||
- Added support for Python 3.8. | |||
- Updated Wikicode.matches() to recognize underscores as being equivalent | |||
to spaces. (`#216 <https://github.com/earwig/mwparserfromhell/issues/216>`_) | |||
- Fixed a rare parsing bug involving deeply nested style tags. | |||
(`#224 <https://github.com/earwig/mwparserfromhell/issues/224>`_) | |||
v0.5.4 | |||
------ | |||
`Released May 15, 2019 <https://github.com/earwig/mwparserfromhell/tree/v0.5.4>`_ | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5.3...v0.5.4>`__): | |||
- Fixed an unlikely crash in the C tokenizer when interrupted while parsing | |||
a heading. | |||
v0.5.3 | |||
------ | |||
`Released March 30, 2019 <https://github.com/earwig/mwparserfromhell/tree/v0.5.3>`_ | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5.2...v0.5.3>`__): | |||
- Fixed manual construction of Node objects, previously unsupported. | |||
(`#214 <https://github.com/earwig/mwparserfromhell/issues/214>`_) | |||
- Fixed :class:`.Wikicode` transformation methods (:meth:`.Wikicode.replace`, | |||
:meth:`.Wikicode.remove`, etc.) when passed an empty section as an argument. | |||
(`#212 <https://github.com/earwig/mwparserfromhell/issues/212>`_) | |||
- Fixed the parser getting stuck inside malformed tables. | |||
(`#206 <https://github.com/earwig/mwparserfromhell/issues/206>`_) | |||
v0.5.2 | |||
------ | |||
`Released November 1, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.2>`_ | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5.1...v0.5.2>`__): | |||
- Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. | |||
(`#199 <https://github.com/earwig/mwparserfromhell/issues/199>`_, | |||
`#204 <https://github.com/earwig/mwparserfromhell/pull/204>`_) | |||
- Fixed signals getting stuck inside the C tokenizer until parsing finishes, | |||
in pathological cases. | |||
(`#206 <https://github.com/earwig/mwparserfromhell/issues/206>`_) | |||
- Fixed `<wbr>` not being considered a single-only tag. | |||
(`#200 <https://github.com/earwig/mwparserfromhell/pull/200>`_) | |||
- Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. | |||
(`#208 <https://github.com/earwig/mwparserfromhell/issues/208>`_) | |||
- Cleaned up some minor documentation issues. | |||
(`#207 <https://github.com/earwig/mwparserfromhell/pull/207>`_) | |||
v0.5.1 | |||
------ | |||
@@ -42,7 +42,7 @@ master_doc = 'index' | |||
# General information about the project. | |||
project = u'mwparserfromhell' | |||
copyright = u'2012–2018 Ben Kurtovic' | |||
copyright = u'2012–2019 Ben Kurtovic' | |||
# The version info for the project you're documenting, acts as replacement for | |||
# |version| and |release|, also used in various other places throughout the | |||
@@ -3,15 +3,15 @@ MWParserFromHell v\ |version| Documentation | |||
:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python package | |||
that provides an easy-to-use and outrageously powerful parser for MediaWiki_ | |||
wikicode. It supports Python 2 and Python 3. | |||
wikicode. It supports Python 3.4+. | |||
Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. | |||
Development occurs on GitHub_. | |||
.. _MediaWiki: http://mediawiki.org | |||
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | |||
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | |||
.. _Legoktm: http://en.wikipedia.org/wiki/User:Legoktm | |||
.. _MediaWiki: https://www.mediawiki.org | |||
.. _Earwig: https://en.wikipedia.org/wiki/User:The_Earwig | |||
.. _Σ: https://en.wikipedia.org/wiki/User:%CE%A3 | |||
.. _Legoktm: https://en.wikipedia.org/wiki/User:Legoktm | |||
.. _GitHub: https://github.com/earwig/mwparserfromhell | |||
Installation | |||
@@ -28,10 +28,10 @@ Alternatively, get the latest development version:: | |||
python setup.py install | |||
You can run the comprehensive unit testing suite with | |||
``python setup.py test -q``. | |||
``python -m unittest discover``. | |||
.. _Python Package Index: http://pypi.python.org | |||
.. _get pip: http://pypi.python.org/pypi/pip | |||
.. _Python Package Index: https://pypi.org/ | |||
.. _get pip: https://pypi.org/project/pip/ | |||
Contents | |||
-------- | |||
@@ -7,7 +7,7 @@ Integration | |||
:func:`mwparserfromhell.parse() <mwparserfromhell.__init__.parse>` on | |||
:meth:`~earwigbot.wiki.page.Page.get`. | |||
If you're using Pywikibot_, your code might look like this:: | |||
If you're using Pywikibot_, your code might look like this: | |||
import mwparserfromhell | |||
import pywikibot | |||
@@ -18,23 +18,33 @@ If you're using Pywikibot_, your code might look like this:: | |||
text = page.get() | |||
return mwparserfromhell.parse(text) | |||
If you're not using a library, you can parse any page using the following code | |||
(via the API_):: | |||
If you're not using a library, you can parse any page with the following | |||
Python 3 code (using the API_ and the requests_ library): | |||
import json | |||
from urllib.parse import urlencode | |||
from urllib.request import urlopen | |||
import mwparserfromhell | |||
import requests | |||
API_URL = "https://en.wikipedia.org/w/api.php" | |||
def parse(title): | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
"rvprop": "content", "format": "json", "titles": title} | |||
raw = urlopen(API_URL, urlencode(data).encode()).read() | |||
res = json.loads(raw) | |||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | |||
params = { | |||
"action": "query", | |||
"prop": "revisions", | |||
"rvprop": "content", | |||
"rvslots": "main", | |||
"rvlimit": 1, | |||
"titles": title, | |||
"format": "json", | |||
"formatversion": "2", | |||
} | |||
headers = {"User-Agent": "My-Bot-Name/1.0"} | |||
req = requests.get(API_URL, headers=headers, params=params) | |||
res = req.json() | |||
revision = res["query"]["pages"][0]["revisions"][0] | |||
text = revision["slots"]["main"]["content"] | |||
return mwparserfromhell.parse(text) | |||
.. _EarwigBot: https://github.com/earwig/earwigbot | |||
.. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot | |||
.. _API: http://mediawiki.org/wiki/API | |||
.. _API: https://www.mediawiki.org/wiki/API:Main_page | |||
.. _requests: https://2.python-requests.org |
@@ -7,8 +7,7 @@ Normal usage is rather straightforward (where ``text`` is page text):: | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
``wikicode`` is a :class:`mwparserfromhell.Wikicode <.Wikicode>` object, which | |||
acts like an ordinary ``str`` object (or ``unicode`` in Python 2) with some | |||
extra methods. For example:: | |||
acts like an ordinary ``str`` object with some extra methods. For example:: | |||
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | |||
>>> wikicode = mwparserfromhell.parse(text) | |||
@@ -78,7 +77,6 @@ saving the page!) by calling :func:`str` on it:: | |||
>>> text == code | |||
True | |||
(Likewise, use :func:`unicode(code) <unicode>` in Python 2.) | |||
For more tips, check out :class:`Wikicode's full method list <.Wikicode>` and | |||
the :mod:`list of Nodes <.nodes>`. |
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -23,16 +22,16 @@ | |||
""" | |||
`mwparserfromhell <https://github.com/earwig/mwparserfromhell>`_ (the MediaWiki | |||
Parser from Hell) is a Python package that provides an easy-to-use and | |||
outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | |||
outrageously powerful parser for `MediaWiki <https://www.mediawiki.org>`_ wikicode. | |||
""" | |||
__author__ = "Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2012-2018 Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2012-2019 Ben Kurtovic" | |||
__license__ = "MIT License" | |||
__version__ = "0.6.dev0" | |||
__email__ = "ben.kurtovic@gmail.com" | |||
from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | |||
from . import (definitions, nodes, parser, smart_list, string_mixin, | |||
utils, wikicode) | |||
parse = utils.parse_anything |
@@ -1,29 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Implements support for both Python 2 and Python 3 by defining common types in | |||
terms of their Python 2/3 variants. For example, :class:`str` is set to | |||
:class:`unicode` on Python 2 but :class:`str` on Python 3; likewise, | |||
:class:`bytes` is :class:`str` on 2 but :class:`bytes` on 3. These types are | |||
meant to be imported directly from within the parser's modules. | |||
""" | |||
import sys | |||
py26 = (sys.version_info[0] == 2) and (sys.version_info[1] == 6) | |||
py3k = (sys.version_info[0] == 3) | |||
py32 = py3k and (sys.version_info[1] == 2) | |||
if py3k: | |||
bytes = bytes | |||
str = str | |||
range = range | |||
import html.entities as htmlentities | |||
else: | |||
bytes = str | |||
str = unicode | |||
range = xrange | |||
import htmlentitydefs as htmlentities | |||
del sys |
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -28,7 +27,6 @@ When updating this file, please also update the the C tokenizer version: | |||
- mwparserfromhell/parser/ctokenizer/definitions.h | |||
""" | |||
from __future__ import unicode_literals | |||
__all__ = ["get_html_tag", "is_parsable", "is_visible", "is_single", | |||
"is_single_only", "is_scheme"] | |||
@@ -56,8 +54,8 @@ INVISIBLE_TAGS = [ | |||
"section", "templatedata", "timeline" | |||
] | |||
# [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762 | |||
SINGLE_ONLY = ["br", "hr", "meta", "link", "img"] | |||
# [mediawiki/core.git]/includes/Sanitizer.php @ 065bec63ea | |||
SINGLE_ONLY = ["br", "hr", "meta", "link", "img", "wbr"] | |||
SINGLE = SINGLE_ONLY + ["li", "dt", "dd", "th", "td", "tr"] | |||
MARKUP_TO_HTML = { | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -29,13 +28,11 @@ the name of a :class:`.Template` is a :class:`.Wikicode` object that can | |||
contain text or more templates. | |||
""" | |||
from __future__ import unicode_literals | |||
from ..compat import str | |||
from ..string_mixin import StringMixIn | |||
__all__ = ["Node", "Text", "Argument", "Heading", "HTMLEntity", "Tag", | |||
"Template"] | |||
__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", | |||
"Node", "Tag", "Template", "Text", "Wikilink"] | |||
class Node(StringMixIn): | |||
"""Represents the base Node type, demonstrating the methods to override. | |||
@@ -56,6 +53,7 @@ class Node(StringMixIn): | |||
def __children__(self): | |||
return | |||
# pylint: disable=unreachable | |||
yield # pragma: no cover (this is a generator that yields nothing) | |||
def __strip__(self, **kwargs): | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,10 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str | |||
from ..utils import parse_anything | |||
__all__ = ["Argument"] | |||
@@ -32,9 +29,9 @@ class Argument(Node): | |||
"""Represents a template argument substitution, like ``{{{foo}}}``.""" | |||
def __init__(self, name, default=None): | |||
super(Argument, self).__init__() | |||
self._name = name | |||
self._default = default | |||
super().__init__() | |||
self.name = name | |||
self.default = default | |||
def __unicode__(self): | |||
start = "{{{" + str(self.name) | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,10 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str | |||
__all__ = ["Comment"] | |||
@@ -31,11 +28,11 @@ class Comment(Node): | |||
"""Represents a hidden HTML comment, like ``<!-- foobar -->``.""" | |||
def __init__(self, contents): | |||
super(Comment, self).__init__() | |||
self._contents = contents | |||
super().__init__() | |||
self.contents = contents | |||
def __unicode__(self): | |||
return "<!--" + str(self.contents) + "-->" | |||
return "<!--" + self.contents + "-->" | |||
@property | |||
def contents(self): | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,10 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str | |||
from ..utils import parse_anything | |||
__all__ = ["ExternalLink"] | |||
@@ -32,10 +29,10 @@ class ExternalLink(Node): | |||
"""Represents an external link, like ``[http://example.com/ Example]``.""" | |||
def __init__(self, url, title=None, brackets=True): | |||
super(ExternalLink, self).__init__() | |||
self._url = url | |||
self._title = title | |||
self._brackets = brackets | |||
super().__init__() | |||
self.url = url | |||
self.title = title | |||
self.brackets = brackets | |||
def __unicode__(self): | |||
if self.brackets: | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -21,8 +20,8 @@ | |||
# SOFTWARE. | |||
""" | |||
This package contains objects used by :class:`.Node`\ s, but that are not nodes | |||
themselves. This includes template parameters and HTML tag attributes. | |||
This package contains objects used by :class:`.Node`\\ s, but that are not | |||
nodes themselves. This includes template parameters and HTML tag attributes. | |||
""" | |||
from .attribute import Attribute | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,9 +19,7 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from ...compat import str | |||
from ...string_mixin import StringMixIn | |||
from ...utils import parse_anything | |||
@@ -37,16 +34,15 @@ class Attribute(StringMixIn): | |||
""" | |||
def __init__(self, name, value=None, quotes='"', pad_first=" ", | |||
pad_before_eq="", pad_after_eq="", check_quotes=True): | |||
super(Attribute, self).__init__() | |||
if check_quotes and not quotes and self._value_needs_quotes(value): | |||
raise ValueError("given value {0!r} requires quotes".format(value)) | |||
self._name = name | |||
self._value = value | |||
self._quotes = quotes | |||
self._pad_first = pad_first | |||
self._pad_before_eq = pad_before_eq | |||
self._pad_after_eq = pad_after_eq | |||
pad_before_eq="", pad_after_eq=""): | |||
super().__init__() | |||
self.name = name | |||
self._quotes = None | |||
self.value = value | |||
self.quotes = quotes | |||
self.pad_first = pad_first | |||
self.pad_before_eq = pad_before_eq | |||
self.pad_after_eq = pad_after_eq | |||
def __unicode__(self): | |||
result = self.pad_first + str(self.name) + self.pad_before_eq | |||
@@ -59,10 +55,17 @@ class Attribute(StringMixIn): | |||
@staticmethod | |||
def _value_needs_quotes(val): | |||
"""Return the preferred quotes for the given value, or None.""" | |||
if val and any(char.isspace() for char in val): | |||
return ('"' in val and "'" in val) or ("'" if '"' in val else '"') | |||
return None | |||
"""Return valid quotes for the given value, or None if unneeded.""" | |||
if not val: | |||
return None | |||
val = "".join(str(node) for node in val.filter_text(recursive=False)) | |||
if not any(char.isspace() for char in val): | |||
return None | |||
if "'" in val and '"' not in val: | |||
return '"' | |||
if '"' in val and "'" not in val: | |||
return "'" | |||
return "\"'" # Either acceptable, " preferred over ' | |||
def _set_padding(self, attr, value): | |||
"""Setter for the value of a padding attribute.""" | |||
@@ -79,7 +82,7 @@ class Attribute(StringMixIn): | |||
"""Coerce a quote type into an acceptable value, or raise an error.""" | |||
orig, quotes = quotes, str(quotes) if quotes else None | |||
if quotes not in [None, '"', "'"]: | |||
raise ValueError("{0!r} is not a valid quote type".format(orig)) | |||
raise ValueError("{!r} is not a valid quote type".format(orig)) | |||
return quotes | |||
@property | |||
@@ -123,8 +126,8 @@ class Attribute(StringMixIn): | |||
else: | |||
code = parse_anything(newval) | |||
quotes = self._value_needs_quotes(code) | |||
if quotes in ['"', "'"] or (quotes is True and not self.quotes): | |||
self._quotes = quotes | |||
if quotes and (not self.quotes or self.quotes not in quotes): | |||
self._quotes = quotes[0] | |||
self._value = code | |||
@quotes.setter | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,10 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import re | |||
from ...compat import str | |||
from ...string_mixin import StringMixIn | |||
from ...utils import parse_anything | |||
@@ -39,12 +36,10 @@ class Parameter(StringMixIn): | |||
""" | |||
def __init__(self, name, value, showkey=True): | |||
super(Parameter, self).__init__() | |||
if not showkey and not self.can_hide_key(name): | |||
raise ValueError("key {0!r} cannot be hidden".format(name)) | |||
self._name = name | |||
self._value = value | |||
self._showkey = showkey | |||
super().__init__() | |||
self.name = name | |||
self.value = value | |||
self.showkey = showkey | |||
def __unicode__(self): | |||
if self.showkey: | |||
@@ -83,5 +78,6 @@ class Parameter(StringMixIn): | |||
def showkey(self, newval): | |||
newval = bool(newval) | |||
if not newval and not self.can_hide_key(self.name): | |||
raise ValueError("parameter key cannot be hidden") | |||
raise ValueError("parameter key {!r} cannot be hidden".format( | |||
self.name)) | |||
self._showkey = newval |
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,10 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str | |||
from ..utils import parse_anything | |||
__all__ = ["Heading"] | |||
@@ -32,9 +29,9 @@ class Heading(Node): | |||
"""Represents a section heading in wikicode, like ``== Foo ==``.""" | |||
def __init__(self, title, level): | |||
super(Heading, self).__init__() | |||
self._title = title | |||
self._level = level | |||
super().__init__() | |||
self.title = title | |||
self.level = level | |||
def __unicode__(self): | |||
return ("=" * self.level) + str(self.title) + ("=" * self.level) | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,10 +19,9 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import html.entities as htmlentities | |||
from . import Node | |||
from ..compat import htmlentities, py3k, str | |||
__all__ = ["HTMLEntity"] | |||
@@ -31,7 +29,7 @@ class HTMLEntity(Node): | |||
"""Represents an HTML entity, like `` ``, either named or unnamed.""" | |||
def __init__(self, value, named=None, hexadecimal=False, hex_char="x"): | |||
super(HTMLEntity, self).__init__() | |||
super().__init__() | |||
self._value = value | |||
if named is None: # Try to guess whether or not the entity is named | |||
try: | |||
@@ -53,42 +51,16 @@ class HTMLEntity(Node): | |||
def __unicode__(self): | |||
if self.named: | |||
return "&{0};".format(self.value) | |||
return "&{};".format(self.value) | |||
if self.hexadecimal: | |||
return "&#{0}{1};".format(self.hex_char, self.value) | |||
return "&#{0};".format(self.value) | |||
return "&#{}{};".format(self.hex_char, self.value) | |||
return "&#{};".format(self.value) | |||
def __strip__(self, **kwargs): | |||
if kwargs.get("normalize"): | |||
return self.normalize() | |||
return self | |||
if not py3k: | |||
@staticmethod | |||
def _unichr(value): | |||
"""Implement builtin unichr() with support for non-BMP code points. | |||
On wide Python builds, this functions like the normal unichr(). On | |||
narrow builds, this returns the value's encoded surrogate pair. | |||
""" | |||
try: | |||
return unichr(value) | |||
except ValueError: | |||
# Test whether we're on the wide or narrow Python build. Check | |||
# the length of a non-BMP code point | |||
# (U+1F64A, SPEAK-NO-EVIL MONKEY): | |||
if len("\U0001F64A") == 1: # pragma: no cover | |||
raise | |||
# Ensure this is within the range we can encode: | |||
if value > 0x10FFFF: | |||
raise ValueError("unichr() arg not in range(0x110000)") | |||
code = value - 0x10000 | |||
if value < 0: # Invalid code point | |||
raise | |||
lead = 0xD800 + (code >> 10) | |||
trail = 0xDC00 + (code % (1 << 10)) | |||
return unichr(lead) + unichr(trail) | |||
@property | |||
def value(self): | |||
"""The string value of the HTML entity.""" | |||
@@ -173,9 +145,8 @@ class HTMLEntity(Node): | |||
def normalize(self): | |||
"""Return the unicode character represented by the HTML entity.""" | |||
chrfunc = chr if py3k else HTMLEntity._unichr | |||
if self.named: | |||
return chrfunc(htmlentities.name2codepoint[self.value]) | |||
return chr(htmlentities.name2codepoint[self.value]) | |||
if self.hexadecimal: | |||
return chrfunc(int(self.value, 16)) | |||
return chrfunc(int(self.value)) | |||
return chr(int(self.value, 16)) | |||
return chr(int(self.value)) |
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,11 +19,9 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from .extras import Attribute | |||
from ..compat import str | |||
from ..definitions import is_visible | |||
from ..utils import parse_anything | |||
@@ -37,29 +34,21 @@ class Tag(Node): | |||
self_closing=False, invalid=False, implicit=False, padding="", | |||
closing_tag=None, wiki_style_separator=None, | |||
closing_wiki_markup=None): | |||
super(Tag, self).__init__() | |||
self._tag = tag | |||
if contents is None and not self_closing: | |||
self._contents = parse_anything("") | |||
else: | |||
self._contents = contents | |||
super().__init__() | |||
self.tag = tag | |||
self.contents = contents | |||
self._attrs = attrs if attrs else [] | |||
self._wiki_markup = wiki_markup | |||
self._self_closing = self_closing | |||
self._invalid = invalid | |||
self._implicit = implicit | |||
self._padding = padding | |||
if closing_tag: | |||
self._closing_tag = closing_tag | |||
else: | |||
self._closing_tag = tag | |||
self._wiki_style_separator = wiki_style_separator | |||
self._closing_wiki_markup = None | |||
self.wiki_markup = wiki_markup | |||
self.self_closing = self_closing | |||
self.invalid = invalid | |||
self.implicit = implicit | |||
self.padding = padding | |||
if closing_tag is not None: | |||
self.closing_tag = closing_tag | |||
self.wiki_style_separator = wiki_style_separator | |||
if closing_wiki_markup is not None: | |||
self._closing_wiki_markup = closing_wiki_markup | |||
elif wiki_markup and not self_closing: | |||
self._closing_wiki_markup = wiki_markup | |||
else: | |||
self._closing_wiki_markup = None | |||
self.closing_wiki_markup = closing_wiki_markup | |||
def __unicode__(self): | |||
if self.wiki_markup: | |||
@@ -69,10 +58,10 @@ class Tag(Node): | |||
attrs = "" | |||
padding = self.padding or "" | |||
separator = self.wiki_style_separator or "" | |||
close = self.closing_wiki_markup or "" | |||
if self.self_closing: | |||
return self.wiki_markup + attrs + padding + separator | |||
else: | |||
close = self.closing_wiki_markup or "" | |||
return self.wiki_markup + attrs + padding + separator + \ | |||
str(self.contents) + close | |||
@@ -93,10 +82,10 @@ class Tag(Node): | |||
yield attr.name | |||
if attr.value is not None: | |||
yield attr.value | |||
if self.contents: | |||
if not self.self_closing: | |||
yield self.contents | |||
if not self.self_closing and not self.wiki_markup and self.closing_tag: | |||
yield self.closing_tag | |||
if not self.wiki_markup and self.closing_tag: | |||
yield self.closing_tag | |||
def __strip__(self, **kwargs): | |||
if self.contents and is_visible(self.tag): | |||
@@ -308,7 +297,10 @@ class Tag(Node): | |||
return attr | |||
def remove(self, name): | |||
"""Remove all attributes with the given *name*.""" | |||
"""Remove all attributes with the given *name*. | |||
Raises :exc:`ValueError` if none were found. | |||
""" | |||
attrs = [attr for attr in self.attributes if attr.name == name.strip()] | |||
if not attrs: | |||
raise ValueError(name) | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,13 +19,11 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from collections import defaultdict | |||
import re | |||
from . import HTMLEntity, Node, Text | |||
from .extras import Parameter | |||
from ..compat import range, str | |||
from ..utils import parse_anything | |||
__all__ = ["Template"] | |||
@@ -37,8 +34,8 @@ class Template(Node): | |||
"""Represents a template in wikicode, like ``{{foo}}``.""" | |||
def __init__(self, name, params=None): | |||
super(Template, self).__init__() | |||
self._name = name | |||
super().__init__() | |||
self.name = name | |||
if params: | |||
self._params = params | |||
else: | |||
@@ -108,7 +105,7 @@ class Template(Node): | |||
def _blank_param_value(value): | |||
"""Remove the content from *value* while keeping its whitespace. | |||
Replace *value*\ 's nodes with two text nodes, the first containing | |||
Replace *value*\\ 's nodes with two text nodes, the first containing | |||
whitespace from before its content and the second containing whitespace | |||
from after its content. | |||
""" | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,10 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str | |||
__all__ = ["Text"] | |||
@@ -31,8 +28,8 @@ class Text(Node): | |||
"""Represents ordinary, unformatted text with no special properties.""" | |||
def __init__(self, value): | |||
super(Text, self).__init__() | |||
self._value = value | |||
super().__init__() | |||
self.value = value | |||
def __unicode__(self): | |||
return self.value | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,10 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str | |||
from ..utils import parse_anything | |||
__all__ = ["Wikilink"] | |||
@@ -32,9 +29,9 @@ class Wikilink(Node): | |||
"""Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | |||
def __init__(self, title, text=None): | |||
super(Wikilink, self).__init__() | |||
self._title = title | |||
self._text = text | |||
super().__init__() | |||
self.title = title | |||
self.text = text | |||
def __unicode__(self): | |||
if self.text is not None: | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -35,8 +34,8 @@ class ParserError(Exception): | |||
can happen. Its appearance indicates a bug. | |||
""" | |||
def __init__(self, extra): | |||
msg = "This is a bug and should be reported. Info: {0}.".format(extra) | |||
super(ParserError, self).__init__(msg) | |||
msg = "This is a bug and should be reported. Info: {}.".format(extra) | |||
super().__init__(msg) | |||
from .builder import Builder | |||
@@ -50,13 +49,13 @@ except ImportError: | |||
__all__ = ["use_c", "Parser", "ParserError"] | |||
class Parser(object): | |||
class Parser: | |||
"""Represents a parser for wikicode. | |||
Actual parsing is a two-step process: first, the text is split up into a | |||
series of tokens by the :class:`.Tokenizer`, and then the tokens are | |||
converted into trees of :class:`.Wikicode` objects and :class:`.Node`\ s by | |||
the :class:`.Builder`. | |||
converted into trees of :class:`.Wikicode` objects and :class:`.Node`\\ s | |||
by the :class:`.Builder`. | |||
Instances of this class or its dependents (:class:`.Tokenizer` and | |||
:class:`.Builder`) should not be shared between threads. :meth:`parse` can | |||
@@ -79,7 +78,7 @@ class Parser(object): | |||
If given, *context* will be passed as a starting context to the parser. | |||
This is helpful when this function is used inside node attribute | |||
setters. For example, :class:`.ExternalLink`\ 's | |||
setters. For example, :class:`.ExternalLink`\\ 's | |||
:attr:`~.ExternalLink.url` setter sets *context* to | |||
:mod:`contexts.EXT_LINK_URI <.contexts>` to prevent the URL itself | |||
from becoming an :class:`.ExternalLink`. | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,10 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import tokens, ParserError | |||
from ..compat import str | |||
from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, | |||
Template, Text, Wikilink) | |||
from ..nodes.extras import Attribute, Parameter | |||
@@ -45,10 +42,10 @@ def _add_handler(token_type): | |||
return decorator | |||
class Builder(object): | |||
class Builder: | |||
"""Builds a tree of nodes out of a sequence of tokens. | |||
To use, pass a list of :class:`.Token`\ s to the :meth:`build` method. The | |||
To use, pass a list of :class:`.Token`\\ s to the :meth:`build` method. The | |||
list will be exhausted as it is parsed and a :class:`.Wikicode` object | |||
containing the node tree will be returned. | |||
""" | |||
@@ -237,8 +234,7 @@ class Builder(object): | |||
else: | |||
name, value = self._pop(), None | |||
return Attribute(name, value, quotes, start.pad_first, | |||
start.pad_before_eq, start.pad_after_eq, | |||
check_quotes=False) | |||
start.pad_before_eq, start.pad_after_eq) | |||
else: | |||
self._write(self._handle_token(token)) | |||
raise ParserError("_handle_attribute() missed a close token") | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -193,3 +192,16 @@ UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + | |||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE + TABLE_ROW_OPEN | |||
NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI | |||
NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK | |||
def describe(context): | |||
"""Return a string describing the given context value, for debugging.""" | |||
flags = [] | |||
for name, value in globals().items(): | |||
if not isinstance(value, int) or name.startswith("GL_"): | |||
continue | |||
if bin(value).count("1") != 1: | |||
continue # Hacky way to skip aggregate contexts | |||
if context & value: | |||
flags.append((name, value)) | |||
flags.sort(key=lambda it: it[1]) | |||
return "|".join(it[0] for it in flags) |
@@ -23,7 +23,7 @@ SOFTWARE. | |||
#pragma once | |||
#ifndef PY_SSIZE_T_CLEAN | |||
#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/2/c-api/arg.html | |||
#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/3/c-api/arg.html | |||
#endif | |||
#include <Python.h> | |||
@@ -34,10 +34,6 @@ SOFTWARE. | |||
/* Compatibility macros */ | |||
#if PY_MAJOR_VERSION >= 3 | |||
#define IS_PY3K | |||
#endif | |||
#ifndef uint64_t | |||
#define uint64_t unsigned PY_LONG_LONG | |||
#endif | |||
@@ -48,20 +44,8 @@ SOFTWARE. | |||
/* Unicode support macros */ | |||
#if defined(IS_PY3K) && PY_MINOR_VERSION >= 3 | |||
#define PEP_393 | |||
#endif | |||
#ifdef PEP_393 | |||
#define Unicode Py_UCS4 | |||
#define PyUnicode_FROM_SINGLE(chr) \ | |||
PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &(chr), 1) | |||
#else | |||
#define Unicode Py_UNICODE | |||
#define PyUnicode_FROM_SINGLE(chr) \ | |||
PyUnicode_FromUnicode(&(chr), 1) | |||
#define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE | |||
#endif | |||
/* Error handling macros */ | |||
@@ -85,13 +69,9 @@ extern PyObject* definitions; | |||
typedef struct { | |||
Py_ssize_t capacity; | |||
Py_ssize_t length; | |||
#ifdef PEP_393 | |||
PyObject* object; | |||
int kind; | |||
void* data; | |||
#else | |||
Py_UNICODE* data; | |||
#endif | |||
} Textbuffer; | |||
typedef struct { | |||
@@ -111,12 +91,8 @@ typedef struct Stack Stack; | |||
typedef struct { | |||
PyObject* object; /* base PyUnicodeObject object */ | |||
Py_ssize_t length; /* length of object, in code points */ | |||
#ifdef PEP_393 | |||
int kind; /* object's kind value */ | |||
void* data; /* object's raw unicode buffer */ | |||
#else | |||
Py_UNICODE* buf; /* object's internal buffer */ | |||
#endif | |||
} TokenizerInput; | |||
typedef struct avl_tree_node avl_tree; | |||
@@ -45,11 +45,12 @@ static const char* PARSER_BLACKLIST[] = { | |||
}; | |||
static const char* SINGLE[] = { | |||
"br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", NULL | |||
"br", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", | |||
"wbr", NULL | |||
}; | |||
static const char* SINGLE_ONLY[] = { | |||
"br", "hr", "meta", "link", "img", NULL | |||
"br", "hr", "meta", "link", "img", "wbr", NULL | |||
}; | |||
/* | |||
@@ -32,7 +32,7 @@ typedef struct { | |||
Textbuffer* pad_first; | |||
Textbuffer* pad_before_eq; | |||
Textbuffer* pad_after_eq; | |||
Unicode quoter; | |||
Py_UCS4 quoter; | |||
Py_ssize_t reset; | |||
} TagData; | |||
@@ -29,23 +29,16 @@ SOFTWARE. | |||
/* | |||
Internal allocation function for textbuffers. | |||
*/ | |||
static int internal_alloc(Textbuffer* self, Unicode maxchar) | |||
static int internal_alloc(Textbuffer* self, Py_UCS4 maxchar) | |||
{ | |||
self->capacity = INITIAL_CAPACITY; | |||
self->length = 0; | |||
#ifdef PEP_393 | |||
self->object = PyUnicode_New(self->capacity, maxchar); | |||
if (!self->object) | |||
return -1; | |||
self->kind = PyUnicode_KIND(self->object); | |||
self->data = PyUnicode_DATA(self->object); | |||
#else | |||
(void) maxchar; // Unused | |||
self->data = malloc(sizeof(Unicode) * self->capacity); | |||
if (!self->data) | |||
return -1; | |||
#endif | |||
return 0; | |||
} | |||
@@ -55,11 +48,7 @@ static int internal_alloc(Textbuffer* self, Unicode maxchar) | |||
*/ | |||
static void internal_dealloc(Textbuffer* self) | |||
{ | |||
#ifdef PEP_393 | |||
Py_DECREF(self->object); | |||
#else | |||
free(self->data); | |||
#endif | |||
} | |||
/* | |||
@@ -67,7 +56,6 @@ static void internal_dealloc(Textbuffer* self) | |||
*/ | |||
static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) | |||
{ | |||
#ifdef PEP_393 | |||
PyObject *newobj; | |||
void *newdata; | |||
@@ -79,10 +67,6 @@ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) | |||
Py_DECREF(self->object); | |||
self->object = newobj; | |||
self->data = newdata; | |||
#else | |||
if (!(self->data = realloc(self->data, sizeof(Unicode) * new_cap))) | |||
return -1; | |||
#endif | |||
self->capacity = new_cap; | |||
return 0; | |||
@@ -94,11 +78,9 @@ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) | |||
Textbuffer* Textbuffer_new(TokenizerInput* text) | |||
{ | |||
Textbuffer* self = malloc(sizeof(Textbuffer)); | |||
Unicode maxchar = 0; | |||
Py_UCS4 maxchar = 0; | |||
#ifdef PEP_393 | |||
maxchar = PyUnicode_MAX_CHAR_VALUE(text->object); | |||
#endif | |||
if (!self) | |||
goto fail_nomem; | |||
@@ -127,11 +109,9 @@ void Textbuffer_dealloc(Textbuffer* self) | |||
*/ | |||
int Textbuffer_reset(Textbuffer* self) | |||
{ | |||
Unicode maxchar = 0; | |||
Py_UCS4 maxchar = 0; | |||
#ifdef PEP_393 | |||
maxchar = PyUnicode_MAX_CHAR_VALUE(self->object); | |||
#endif | |||
internal_dealloc(self); | |||
if (internal_alloc(self, maxchar)) | |||
@@ -142,18 +122,14 @@ int Textbuffer_reset(Textbuffer* self) | |||
/* | |||
Write a Unicode codepoint to the given textbuffer. | |||
*/ | |||
int Textbuffer_write(Textbuffer* self, Unicode code) | |||
int Textbuffer_write(Textbuffer* self, Py_UCS4 code) | |||
{ | |||
if (self->length >= self->capacity) { | |||
if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) | |||
return -1; | |||
} | |||
#ifdef PEP_393 | |||
PyUnicode_WRITE(self->kind, self->data, self->length++, code); | |||
#else | |||
self->data[self->length++] = code; | |||
#endif | |||
return 0; | |||
} | |||
@@ -163,13 +139,9 @@ int Textbuffer_write(Textbuffer* self, Unicode code) | |||
This function does not check for bounds. | |||
*/ | |||
Unicode Textbuffer_read(Textbuffer* self, Py_ssize_t index) | |||
Py_UCS4 Textbuffer_read(Textbuffer* self, Py_ssize_t index) | |||
{ | |||
#ifdef PEP_393 | |||
return PyUnicode_READ(self->kind, self->data, index); | |||
#else | |||
return self->data[index]; | |||
#endif | |||
} | |||
/* | |||
@@ -177,11 +149,7 @@ Unicode Textbuffer_read(Textbuffer* self, Py_ssize_t index) | |||
*/ | |||
PyObject* Textbuffer_render(Textbuffer* self) | |||
{ | |||
#ifdef PEP_393 | |||
return PyUnicode_FromKindAndData(self->kind, self->data, self->length); | |||
#else | |||
return PyUnicode_FromUnicode(self->data, self->length); | |||
#endif | |||
} | |||
/* | |||
@@ -196,14 +164,9 @@ int Textbuffer_concat(Textbuffer* self, Textbuffer* other) | |||
return -1; | |||
} | |||
#ifdef PEP_393 | |||
assert(self->kind == other->kind); | |||
memcpy(((Py_UCS1*) self->data) + self->kind * self->length, other->data, | |||
other->length * other->kind); | |||
#else | |||
memcpy(self->data + self->length, other->data, | |||
other->length * sizeof(Unicode)); | |||
#endif | |||
self->length = newlen; | |||
return 0; | |||
@@ -215,18 +178,12 @@ int Textbuffer_concat(Textbuffer* self, Textbuffer* other) | |||
void Textbuffer_reverse(Textbuffer* self) | |||
{ | |||
Py_ssize_t i, end = self->length - 1; | |||
Unicode tmp; | |||
Py_UCS4 tmp; | |||
for (i = 0; i < self->length / 2; i++) { | |||
#ifdef PEP_393 | |||
tmp = PyUnicode_READ(self->kind, self->data, i); | |||
PyUnicode_WRITE(self->kind, self->data, i, | |||
PyUnicode_READ(self->kind, self->data, end - i)); | |||
PyUnicode_WRITE(self->kind, self->data, end - i, tmp); | |||
#else | |||
tmp = self->data[i]; | |||
self->data[i] = self->data[end - i]; | |||
self->data[end - i] = tmp; | |||
#endif | |||
} | |||
} |
@@ -29,8 +29,8 @@ SOFTWARE. | |||
Textbuffer* Textbuffer_new(TokenizerInput*); | |||
void Textbuffer_dealloc(Textbuffer*); | |||
int Textbuffer_reset(Textbuffer*); | |||
int Textbuffer_write(Textbuffer*, Unicode); | |||
Unicode Textbuffer_read(Textbuffer*, Py_ssize_t); | |||
int Textbuffer_write(Textbuffer*, Py_UCS4); | |||
Py_UCS4 Textbuffer_read(Textbuffer*, Py_ssize_t); | |||
PyObject* Textbuffer_render(Textbuffer*); | |||
int Textbuffer_concat(Textbuffer*, Textbuffer*); | |||
void Textbuffer_reverse(Textbuffer*); |
@@ -1,5 +1,5 @@ | |||
/* | |||
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
@@ -52,7 +52,7 @@ static int Tokenizer_parse_tag(Tokenizer*); | |||
/* | |||
Determine whether the given code point is a marker. | |||
*/ | |||
static int is_marker(Unicode this) | |||
static int is_marker(Py_UCS4 this) | |||
{ | |||
int i; | |||
@@ -442,7 +442,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) | |||
static const char* valid = URISCHEME; | |||
Textbuffer* buffer; | |||
PyObject* scheme; | |||
Unicode this; | |||
Py_UCS4 this; | |||
int slashes, i; | |||
if (Tokenizer_check_route(self, LC_EXT_LINK_URI) < 0) | |||
@@ -463,7 +463,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) | |||
while (1) { | |||
if (!valid[i]) | |||
goto end_of_loop; | |||
if (this == (Unicode) valid[i]) | |||
if (this == (Py_UCS4) valid[i]) | |||
break; | |||
i++; | |||
} | |||
@@ -516,7 +516,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||
static const char* valid = URISCHEME; | |||
Textbuffer *scheme_buffer = Textbuffer_new(&self->text); | |||
PyObject *scheme; | |||
Unicode chunk; | |||
Py_UCS4 chunk; | |||
Py_ssize_t i; | |||
int slashes, j; | |||
uint64_t new_context; | |||
@@ -536,7 +536,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||
FAIL_ROUTE(0); | |||
return 0; | |||
} | |||
} while (chunk != (Unicode) valid[j++]); | |||
} while (chunk != (Py_UCS4) valid[j++]); | |||
Textbuffer_write(scheme_buffer, chunk); | |||
} | |||
end_of_loop: | |||
@@ -580,7 +580,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||
Handle text in a free external link, including trailing punctuation. | |||
*/ | |||
static int Tokenizer_handle_free_link_text( | |||
Tokenizer* self, int* parens, Textbuffer* tail, Unicode this) | |||
Tokenizer* self, int* parens, Textbuffer* tail, Py_UCS4 this) | |||
{ | |||
#define PUSH_TAIL_BUFFER(tail, error) \ | |||
if (tail && tail->length > 0) { \ | |||
@@ -607,10 +607,10 @@ static int Tokenizer_handle_free_link_text( | |||
Return whether the current head is the end of a free link. | |||
*/ | |||
static int | |||
Tokenizer_is_free_link(Tokenizer* self, Unicode this, Unicode next) | |||
Tokenizer_is_free_link(Tokenizer* self, Py_UCS4 this, Py_UCS4 next) | |||
{ | |||
// Built from Tokenizer_parse()'s end sentinels: | |||
Unicode after = Tokenizer_read(self, 2); | |||
Py_UCS4 after = Tokenizer_read(self, 2); | |||
uint64_t ctx = self->topstack->context; | |||
return (!this || this == '\n' || this == '[' || this == ']' || | |||
@@ -628,7 +628,7 @@ static PyObject* | |||
Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||
Textbuffer* extra) | |||
{ | |||
Unicode this, next; | |||
Py_UCS4 this, next; | |||
int parens = 0; | |||
if (brackets ? Tokenizer_parse_bracketed_uri_scheme(self) : | |||
@@ -813,11 +813,10 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||
self->global ^= GL_HEADING; | |||
return 0; | |||
} | |||
#ifdef IS_PY3K | |||
if (!heading) { | |||
return -1; | |||
} | |||
level = PyLong_FromSsize_t(heading->level); | |||
#else | |||
level = PyInt_FromSsize_t(heading->level); | |||
#endif | |||
if (!level) { | |||
Py_DECREF(heading->title); | |||
free(heading); | |||
@@ -892,6 +891,9 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | |||
self->head = reset + best - 1; | |||
} | |||
else { | |||
if (!after) { | |||
return NULL; | |||
} | |||
for (i = 0; i < best; i++) { | |||
if (Tokenizer_emit_char(self, '=')) { | |||
Py_DECREF(after->title); | |||
@@ -927,7 +929,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | |||
static int Tokenizer_really_parse_entity(Tokenizer* self) | |||
{ | |||
PyObject *kwargs, *charobj, *textobj; | |||
Unicode this; | |||
Py_UCS4 this; | |||
int numeric, hexadecimal, i, j, zeroes, test; | |||
char *valid, *text, *buffer, *def; | |||
@@ -1008,7 +1010,7 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) | |||
while (1) { | |||
if (!valid[j]) | |||
FAIL_ROUTE_AND_EXIT() | |||
if (this == (Unicode) valid[j]) | |||
if (this == (Py_UCS4) valid[j]) | |||
break; | |||
j++; | |||
} | |||
@@ -1105,7 +1107,7 @@ static int Tokenizer_parse_comment(Tokenizer* self) | |||
{ | |||
Py_ssize_t reset = self->head + 3; | |||
PyObject *comment; | |||
Unicode this; | |||
Py_UCS4 this; | |||
self->head += 4; | |||
if (Tokenizer_push(self, 0)) | |||
@@ -1205,7 +1207,7 @@ static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data) | |||
Handle whitespace inside of an HTML open tag. | |||
*/ | |||
static int Tokenizer_handle_tag_space( | |||
Tokenizer* self, TagData* data, Unicode text) | |||
Tokenizer* self, TagData* data, Py_UCS4 text) | |||
{ | |||
uint64_t ctx = data->context; | |||
uint64_t end_of_value = (ctx & TAG_ATTR_VALUE && | |||
@@ -1237,9 +1239,9 @@ static int Tokenizer_handle_tag_space( | |||
/* | |||
Handle regular text inside of an HTML open tag. | |||
*/ | |||
static int Tokenizer_handle_tag_text(Tokenizer* self, Unicode text) | |||
static int Tokenizer_handle_tag_text(Tokenizer* self, Py_UCS4 text) | |||
{ | |||
Unicode next = Tokenizer_read(self, 1); | |||
Py_UCS4 next = Tokenizer_read(self, 1); | |||
if (!is_marker(text) || !Tokenizer_CAN_RECURSE(self)) | |||
return Tokenizer_emit_char(self, text); | |||
@@ -1256,7 +1258,7 @@ static int Tokenizer_handle_tag_text(Tokenizer* self, Unicode text) | |||
Handle all sorts of text data inside of an HTML open tag. | |||
*/ | |||
static int Tokenizer_handle_tag_data( | |||
Tokenizer* self, TagData* data, Unicode chunk) | |||
Tokenizer* self, TagData* data, Py_UCS4 chunk) | |||
{ | |||
PyObject *trash; | |||
int first_time, escaped; | |||
@@ -1438,7 +1440,7 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self) | |||
{ | |||
Textbuffer* buffer; | |||
PyObject *buf_tmp, *end_tag, *start_tag; | |||
Unicode this, next; | |||
Py_UCS4 this, next; | |||
Py_ssize_t reset; | |||
int cmp; | |||
@@ -1594,7 +1596,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) | |||
{ | |||
TagData *data = TagData_new(&self->text); | |||
PyObject *token, *text, *trash; | |||
Unicode this, next; | |||
Py_UCS4 this, next; | |||
int can_exit; | |||
if (!data) | |||
@@ -1680,7 +1682,7 @@ static int Tokenizer_handle_invalid_tag_start(Tokenizer* self) | |||
Py_ssize_t reset = self->head + 1, pos = 0; | |||
Textbuffer* buf; | |||
PyObject *name, *tag; | |||
Unicode this; | |||
Py_UCS4 this; | |||
self->head += 2; | |||
buf = Textbuffer_new(&self->text); | |||
@@ -1801,6 +1803,11 @@ static int Tokenizer_parse_italics(Tokenizer* self) | |||
if (BAD_ROUTE_CONTEXT & LC_STYLE_PASS_AGAIN) { | |||
context = LC_STYLE_ITALICS | LC_STYLE_SECOND_PASS; | |||
stack = Tokenizer_parse(self, context, 1); | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
return Tokenizer_emit_text(self, "''"); | |||
} | |||
} | |||
else | |||
return Tokenizer_emit_text(self, "''"); | |||
@@ -1977,7 +1984,7 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self) | |||
static int Tokenizer_handle_list_marker(Tokenizer* self) | |||
{ | |||
PyObject *kwargs, *markup; | |||
Unicode code = Tokenizer_read(self, 0); | |||
Py_UCS4 code = Tokenizer_read(self, 0); | |||
if (code == ';') | |||
self->topstack->context |= LC_DLTERM; | |||
@@ -2004,7 +2011,7 @@ static int Tokenizer_handle_list_marker(Tokenizer* self) | |||
*/ | |||
static int Tokenizer_handle_list(Tokenizer* self) | |||
{ | |||
Unicode marker = Tokenizer_read(self, 1); | |||
Py_UCS4 marker = Tokenizer_read(self, 1); | |||
if (Tokenizer_handle_list_marker(self)) | |||
return -1; | |||
@@ -2158,11 +2165,11 @@ Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup, | |||
/* | |||
Handle style attributes for a table until an ending token. | |||
*/ | |||
static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Unicode end_token) | |||
static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Py_UCS4 end_token) | |||
{ | |||
TagData *data = TagData_new(&self->text); | |||
PyObject *padding, *trash; | |||
Unicode this; | |||
Py_UCS4 this; | |||
int can_exit; | |||
if (!data) | |||
@@ -2254,6 +2261,7 @@ static int Tokenizer_parse_table(Tokenizer* self) | |||
Py_DECREF(padding); | |||
Py_DECREF(style); | |||
while (!Tokenizer_IS_CURRENT_STACK(self, restore_point)) { | |||
Tokenizer_memoize_bad_route(self); | |||
trash = Tokenizer_pop(self); | |||
Py_XDECREF(trash); | |||
} | |||
@@ -2471,7 +2479,7 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context) | |||
everything is safe, or -1 if the route must be failed. | |||
*/ | |||
static int | |||
Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Unicode data) | |||
Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UCS4 data) | |||
{ | |||
if (context & LC_FAIL_NEXT) | |||
return -1; | |||
@@ -2556,7 +2564,7 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Unicode data) | |||
static int Tokenizer_has_leading_whitespace(Tokenizer* self) | |||
{ | |||
int offset = 1; | |||
Unicode current_character; | |||
Py_UCS4 current_character; | |||
while (1) { | |||
current_character = Tokenizer_read_backwards(self, offset); | |||
if (!current_character || current_character == '\n') | |||
@@ -2574,7 +2582,7 @@ static int Tokenizer_has_leading_whitespace(Tokenizer* self) | |||
PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) | |||
{ | |||
uint64_t this_context; | |||
Unicode this, next, next_next, last; | |||
Py_UCS4 this, next, next_next, last; | |||
PyObject* temp; | |||
if (push) { | |||
@@ -2603,6 +2611,8 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) | |||
} | |||
if (!this) | |||
return Tokenizer_handle_end(self, this_context); | |||
if (PyErr_CheckSignals()) | |||
return NULL; | |||
next = Tokenizer_read(self, 1); | |||
last = Tokenizer_read_backwards(self, 1); | |||
if (this == next && next == '{') { | |||
@@ -24,7 +24,7 @@ SOFTWARE. | |||
#include "common.h" | |||
static const Unicode MARKERS[] = { | |||
static const Py_UCS4 MARKERS[] = { | |||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | |||
'-', '!', '\n', '\0'}; | |||
@@ -275,7 +275,7 @@ int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, | |||
/* | |||
Write a Unicode codepoint to the current textbuffer. | |||
*/ | |||
int Tokenizer_emit_char(Tokenizer* self, Unicode code) | |||
int Tokenizer_emit_char(Tokenizer* self, Py_UCS4 code) | |||
{ | |||
return Textbuffer_write(self->topstack->textbuffer, code); | |||
} | |||
@@ -389,19 +389,15 @@ int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) | |||
/* | |||
Internal function to read the codepoint at the given index from the input. | |||
*/ | |||
static Unicode read_codepoint(TokenizerInput* text, Py_ssize_t index) | |||
static Py_UCS4 read_codepoint(TokenizerInput* text, Py_ssize_t index) | |||
{ | |||
#ifdef PEP_393 | |||
return PyUnicode_READ(text->kind, text->data, index); | |||
#else | |||
return text->buf[index]; | |||
#endif | |||
} | |||
/* | |||
Read the value at a relative point in the wikicode, forwards. | |||
*/ | |||
Unicode Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||
Py_UCS4 Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||
{ | |||
Py_ssize_t index = self->head + delta; | |||
@@ -413,7 +409,7 @@ Unicode Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||
/* | |||
Read the value at a relative point in the wikicode, backwards. | |||
*/ | |||
Unicode Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | |||
Py_UCS4 Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | |||
{ | |||
Py_ssize_t index; | |||
@@ -38,14 +38,14 @@ void Tokenizer_free_bad_route_tree(Tokenizer*); | |||
int Tokenizer_emit_token(Tokenizer*, PyObject*, int); | |||
int Tokenizer_emit_token_kwargs(Tokenizer*, PyObject*, PyObject*, int); | |||
int Tokenizer_emit_char(Tokenizer*, Unicode); | |||
int Tokenizer_emit_char(Tokenizer*, Py_UCS4); | |||
int Tokenizer_emit_text(Tokenizer*, const char*); | |||
int Tokenizer_emit_textbuffer(Tokenizer*, Textbuffer*); | |||
int Tokenizer_emit_all(Tokenizer*, PyObject*); | |||
int Tokenizer_emit_text_then_stack(Tokenizer*, const char*); | |||
Unicode Tokenizer_read(Tokenizer*, Py_ssize_t); | |||
Unicode Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); | |||
Py_UCS4 Tokenizer_read(Tokenizer*, Py_ssize_t); | |||
Py_UCS4 Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); | |||
/* Macros */ | |||
@@ -85,12 +85,8 @@ static void init_tokenizer_text(TokenizerInput* text) | |||
text->object = Py_None; | |||
Py_INCREF(Py_None); | |||
text->length = 0; | |||
#ifdef PEP_393 | |||
text->kind = PyUnicode_WCHAR_KIND; | |||
text->data = NULL; | |||
#else | |||
text->buf = NULL; | |||
#endif | |||
} | |||
/* | |||
@@ -119,14 +115,10 @@ static int load_tokenizer_text(TokenizerInput* text, PyObject *input) | |||
dealloc_tokenizer_text(text); | |||
text->object = input; | |||
#ifdef PEP_393 | |||
if (PyUnicode_READY(input) < 0) | |||
return -1; | |||
text->kind = PyUnicode_KIND(input); | |||
text->data = PyUnicode_DATA(input); | |||
#else | |||
text->buf = PyUnicode_AS_UNICODE(input); | |||
#endif | |||
text->length = PyUnicode_GET_LENGTH(input); | |||
return 0; | |||
} | |||
@@ -192,11 +184,9 @@ static int load_entities(void) | |||
{ | |||
PyObject *tempmod, *defmap, *deflist; | |||
unsigned numdefs, i; | |||
#ifdef IS_PY3K | |||
PyObject *string; | |||
#endif | |||
tempmod = PyImport_ImportModule(ENTITYDEFS_MODULE); | |||
tempmod = PyImport_ImportModule("html.entities"); | |||
if (!tempmod) | |||
return -1; | |||
defmap = PyObject_GetAttrString(tempmod, "entitydefs"); | |||
@@ -207,19 +197,15 @@ static int load_entities(void) | |||
if (!deflist) | |||
return -1; | |||
Py_DECREF(defmap); | |||
numdefs = (unsigned) PyList_GET_SIZE(defmap); | |||
numdefs = (unsigned) PyList_GET_SIZE(deflist); | |||
entitydefs = calloc(numdefs + 1, sizeof(char*)); | |||
if (!entitydefs) | |||
return -1; | |||
for (i = 0; i < numdefs; i++) { | |||
#ifdef IS_PY3K | |||
string = PyUnicode_AsASCIIString(PyList_GET_ITEM(deflist, i)); | |||
if (!string) | |||
return -1; | |||
entitydefs[i] = PyBytes_AsString(string); | |||
#else | |||
entitydefs[i] = PyBytes_AsString(PyList_GET_ITEM(deflist, i)); | |||
#endif | |||
if (!entitydefs[i]) | |||
return -1; | |||
} | |||
@@ -233,7 +219,7 @@ static int load_tokens(void) | |||
*globals = PyEval_GetGlobals(), | |||
*locals = PyEval_GetLocals(), | |||
*fromlist = PyList_New(1), | |||
*modname = IMPORT_NAME_FUNC("tokens"); | |||
*modname = PyUnicode_FromString("tokens"); | |||
char *name = "mwparserfromhell.parser"; | |||
if (!fromlist || !modname) | |||
@@ -256,7 +242,7 @@ static int load_defs(void) | |||
*globals = PyEval_GetGlobals(), | |||
*locals = PyEval_GetLocals(), | |||
*fromlist = PyList_New(1), | |||
*modname = IMPORT_NAME_FUNC("definitions"); | |||
*modname = PyUnicode_FromString("definitions"); | |||
char *name = "mwparserfromhell"; | |||
if (!fromlist || !modname) | |||
@@ -277,7 +263,7 @@ static int load_exceptions(void) | |||
*globals = PyEval_GetGlobals(), | |||
*locals = PyEval_GetLocals(), | |||
*fromlist = PyList_New(1), | |||
*modname = IMPORT_NAME_FUNC("parser"); | |||
*modname = PyUnicode_FromString("parser"); | |||
char *name = "mwparserfromhell"; | |||
if (!fromlist || !modname) | |||
@@ -294,24 +280,22 @@ static int load_exceptions(void) | |||
return 0; | |||
} | |||
PyMODINIT_FUNC INIT_FUNC_NAME(void) | |||
PyMODINIT_FUNC PyInit__tokenizer(void) | |||
{ | |||
PyObject *module; | |||
TokenizerType.tp_new = PyType_GenericNew; | |||
if (PyType_Ready(&TokenizerType) < 0) | |||
INIT_ERROR; | |||
module = CREATE_MODULE; | |||
return NULL; | |||
module = PyModule_Create(&module_def); | |||
if (!module) | |||
INIT_ERROR; | |||
return NULL; | |||
Py_INCREF(&TokenizerType); | |||
PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); | |||
Py_INCREF(Py_True); | |||
PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); | |||
NOARGS = PyTuple_New(0); | |||
if (!NOARGS || load_entities() || load_tokens() || load_defs()) | |||
INIT_ERROR; | |||
#ifdef IS_PY3K | |||
return NULL; | |||
return module; | |||
#endif | |||
} |
@@ -32,22 +32,6 @@ static void Tokenizer_dealloc(Tokenizer*); | |||
static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); | |||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | |||
/* Compatibility macros */ | |||
#ifdef IS_PY3K | |||
#define IMPORT_NAME_FUNC PyUnicode_FromString | |||
#define CREATE_MODULE PyModule_Create(&module_def); | |||
#define ENTITYDEFS_MODULE "html.entities" | |||
#define INIT_FUNC_NAME PyInit__tokenizer | |||
#define INIT_ERROR return NULL | |||
#else | |||
#define IMPORT_NAME_FUNC PyBytes_FromString | |||
#define CREATE_MODULE Py_InitModule("_tokenizer", NULL); | |||
#define ENTITYDEFS_MODULE "htmlentitydefs" | |||
#define INIT_FUNC_NAME init_tokenizer | |||
#define INIT_ERROR return | |||
#endif | |||
/* Structs */ | |||
static PyMethodDef Tokenizer_methods[] = { | |||
@@ -101,11 +85,9 @@ static PyTypeObject TokenizerType = { | |||
Tokenizer_new, /* tp_new */ | |||
}; | |||
#ifdef IS_PY3K | |||
static PyModuleDef module_def = { | |||
PyModuleDef_HEAD_INIT, | |||
"_tokenizer", | |||
"Creates a list of tokens from a string of wikicode.", | |||
-1, NULL, NULL, NULL, NULL, NULL | |||
}; | |||
#endif |
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,12 +19,11 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import html.entities as htmlentities | |||
from math import log | |||
import re | |||
from . import contexts, tokens, ParserError | |||
from ..compat import htmlentities, range | |||
from ..definitions import (get_html_tag, is_parsable, is_single, | |||
is_single_only, is_scheme) | |||
@@ -35,11 +33,11 @@ class BadRoute(Exception): | |||
"""Raised internally when the current tokenization route is invalid.""" | |||
def __init__(self, context=0): | |||
super(BadRoute, self).__init__() | |||
super().__init__() | |||
self.context = context | |||
class _TagOpenData(object): | |||
class _TagOpenData: | |||
"""Stores data about an HTML open tag, like ``<ref name="foo">``.""" | |||
CX_NAME = 1 << 0 | |||
CX_ATTR_READY = 1 << 1 | |||
@@ -57,7 +55,7 @@ class _TagOpenData(object): | |||
self.reset = 0 | |||
class Tokenizer(object): | |||
class Tokenizer: | |||
"""Creates a list of tokens from a string of wikicode.""" | |||
USES_C = False | |||
START = object() | |||
@@ -455,7 +453,7 @@ class Tokenizer(object): | |||
else: | |||
self._parse_free_uri_scheme() | |||
invalid = ("\n", " ", "[", "]") | |||
punct = tuple(",;\.:!?)") | |||
punct = tuple(",;\\.:!?)") | |||
if self._read() is self.END or self._read()[0] in invalid: | |||
self._fail_route() | |||
tail = "" | |||
@@ -931,7 +929,11 @@ class Tokenizer(object): | |||
self._head = reset | |||
if route.context & contexts.STYLE_PASS_AGAIN: | |||
new_ctx = contexts.STYLE_ITALICS | contexts.STYLE_SECOND_PASS | |||
stack = self._parse(new_ctx) | |||
try: | |||
stack = self._parse(new_ctx) | |||
except BadRoute: | |||
self._head = reset | |||
return self._emit_text("''") | |||
else: | |||
return self._emit_text("''") | |||
self._emit_style_tag("i", "''", stack) | |||
@@ -1133,6 +1135,7 @@ class Tokenizer(object): | |||
table = self._parse(contexts.TABLE_OPEN) | |||
except BadRoute: | |||
while self._stack_ident != restore_point: | |||
self._memoize_bad_route() | |||
self._pop() | |||
self._head = reset | |||
self._emit_text("{") | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -28,9 +27,6 @@ a syntactically valid form by the :class:`.Tokenizer`, and then converted into | |||
the :class`.Wikicode` tree by the :class:`.Builder`. | |||
""" | |||
from __future__ import unicode_literals | |||
from ..compat import py3k, str | |||
__all__ = ["Token"] | |||
@@ -44,7 +40,7 @@ class Token(dict): | |||
args.append(key + "=" + repr(value[:97] + "...")) | |||
else: | |||
args.append(key + "=" + repr(value)) | |||
return "{0}({1})".format(type(self).__name__, ", ".join(args)) | |||
return "{}({})".format(type(self).__name__, ", ".join(args)) | |||
def __eq__(self, other): | |||
return isinstance(other, type(self)) and dict.__eq__(self, other) | |||
@@ -65,7 +61,7 @@ class Token(dict): | |||
def make(name): | |||
"""Create a new Token class using ``type()`` and add it to ``__all__``.""" | |||
__all__.append(name) | |||
return type(name if py3k else name.encode("utf8"), (Token,), {}) | |||
return type(name, (Token,), {}) | |||
Text = make("Text") | |||
@@ -1,456 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
This module contains the :class:`.SmartList` type, as well as its | |||
:class:`._ListProxy` child, which together implement a list whose sublists | |||
reflect changes made to the main list, and vice-versa. | |||
""" | |||
from __future__ import unicode_literals | |||
from sys import maxsize | |||
from weakref import ref | |||
from .compat import py3k | |||
__all__ = ["SmartList"] | |||
def inheritdoc(method): | |||
"""Set __doc__ of *method* to __doc__ of *method* in its parent class. | |||
Since this is used on :class:`.SmartList`, the "parent class" used is | |||
``list``. This function can be used as a decorator. | |||
""" | |||
method.__doc__ = getattr(list, method.__name__).__doc__ | |||
return method | |||
class _SliceNormalizerMixIn(object): | |||
"""MixIn that provides a private method to normalize slices.""" | |||
def _normalize_slice(self, key, clamp=False): | |||
"""Return a slice equivalent to the input *key*, standardized.""" | |||
if key.start is None: | |||
start = 0 | |||
else: | |||
start = (len(self) + key.start) if key.start < 0 else key.start | |||
if key.stop is None or key.stop == maxsize: | |||
stop = len(self) if clamp else None | |||
else: | |||
stop = (len(self) + key.stop) if key.stop < 0 else key.stop | |||
return slice(start, stop, key.step or 1) | |||
class SmartList(_SliceNormalizerMixIn, list): | |||
"""Implements the ``list`` interface with special handling of sublists. | |||
When a sublist is created (by ``list[i:j]``), any changes made to this | |||
list (such as the addition, removal, or replacement of elements) will be | |||
reflected in the sublist, or vice-versa, to the greatest degree possible. | |||
This is implemented by having sublists - instances of the | |||
:class:`._ListProxy` type - dynamically determine their elements by storing | |||
their slice info and retrieving that slice from the parent. Methods that | |||
change the size of the list also change the slice info. For example:: | |||
>>> parent = SmartList([0, 1, 2, 3]) | |||
>>> parent | |||
[0, 1, 2, 3] | |||
>>> child = parent[2:] | |||
>>> child | |||
[2, 3] | |||
>>> child.append(4) | |||
>>> child | |||
[2, 3, 4] | |||
>>> parent | |||
[0, 1, 2, 3, 4] | |||
""" | |||
def __init__(self, iterable=None): | |||
if iterable: | |||
super(SmartList, self).__init__(iterable) | |||
else: | |||
super(SmartList, self).__init__() | |||
self._children = {} | |||
def __getitem__(self, key): | |||
if not isinstance(key, slice): | |||
return super(SmartList, self).__getitem__(key) | |||
key = self._normalize_slice(key, clamp=False) | |||
sliceinfo = [key.start, key.stop, key.step] | |||
child = _ListProxy(self, sliceinfo) | |||
child_ref = ref(child, self._delete_child) | |||
self._children[id(child_ref)] = (child_ref, sliceinfo) | |||
return child | |||
def __setitem__(self, key, item): | |||
if not isinstance(key, slice): | |||
return super(SmartList, self).__setitem__(key, item) | |||
item = list(item) | |||
super(SmartList, self).__setitem__(key, item) | |||
key = self._normalize_slice(key, clamp=True) | |||
diff = len(item) + (key.start - key.stop) // key.step | |||
if not diff: | |||
return | |||
values = self._children.values if py3k else self._children.itervalues | |||
for child, (start, stop, step) in values(): | |||
if start > key.stop: | |||
self._children[id(child)][1][0] += diff | |||
if stop is not None and stop >= key.stop: | |||
self._children[id(child)][1][1] += diff | |||
def __delitem__(self, key): | |||
super(SmartList, self).__delitem__(key) | |||
if isinstance(key, slice): | |||
key = self._normalize_slice(key, clamp=True) | |||
else: | |||
key = slice(key, key + 1, 1) | |||
diff = (key.stop - key.start) // key.step | |||
values = self._children.values if py3k else self._children.itervalues | |||
for child, (start, stop, step) in values(): | |||
if start > key.start: | |||
self._children[id(child)][1][0] -= diff | |||
if stop is not None and stop >= key.stop: | |||
self._children[id(child)][1][1] -= diff | |||
if not py3k: | |||
def __getslice__(self, start, stop): | |||
return self.__getitem__(slice(start, stop)) | |||
def __setslice__(self, start, stop, iterable): | |||
self.__setitem__(slice(start, stop), iterable) | |||
def __delslice__(self, start, stop): | |||
self.__delitem__(slice(start, stop)) | |||
def __add__(self, other): | |||
return SmartList(list(self) + other) | |||
def __radd__(self, other): | |||
return SmartList(other + list(self)) | |||
def __iadd__(self, other): | |||
self.extend(other) | |||
return self | |||
def _delete_child(self, child_ref): | |||
"""Remove a child reference that is about to be garbage-collected.""" | |||
del self._children[id(child_ref)] | |||
def _detach_children(self): | |||
"""Remove all children and give them independent parent copies.""" | |||
children = [val[0] for val in self._children.values()] | |||
for child in children: | |||
child()._parent = list(self) | |||
self._children.clear() | |||
@inheritdoc | |||
def append(self, item): | |||
head = len(self) | |||
self[head:head] = [item] | |||
@inheritdoc | |||
def extend(self, item): | |||
head = len(self) | |||
self[head:head] = item | |||
@inheritdoc | |||
def insert(self, index, item): | |||
self[index:index] = [item] | |||
@inheritdoc | |||
def pop(self, index=None): | |||
if index is None: | |||
index = len(self) - 1 | |||
item = self[index] | |||
del self[index] | |||
return item | |||
@inheritdoc | |||
def remove(self, item): | |||
del self[self.index(item)] | |||
@inheritdoc | |||
def reverse(self): | |||
self._detach_children() | |||
super(SmartList, self).reverse() | |||
if py3k: | |||
@inheritdoc | |||
def sort(self, key=None, reverse=None): | |||
self._detach_children() | |||
kwargs = {} | |||
if key is not None: | |||
kwargs["key"] = key | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
super(SmartList, self).sort(**kwargs) | |||
else: | |||
@inheritdoc | |||
def sort(self, cmp=None, key=None, reverse=None): | |||
self._detach_children() | |||
kwargs = {} | |||
if cmp is not None: | |||
kwargs["cmp"] = cmp | |||
if key is not None: | |||
kwargs["key"] = key | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
super(SmartList, self).sort(**kwargs) | |||
class _ListProxy(_SliceNormalizerMixIn, list): | |||
"""Implement the ``list`` interface by getting elements from a parent. | |||
This is created by a :class:`.SmartList` object when slicing. It does not | |||
actually store the list at any time; instead, whenever the list is needed, | |||
it builds it dynamically using the :meth:`_render` method. | |||
""" | |||
def __init__(self, parent, sliceinfo): | |||
super(_ListProxy, self).__init__() | |||
self._parent = parent | |||
self._sliceinfo = sliceinfo | |||
def __repr__(self): | |||
return repr(self._render()) | |||
def __lt__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() < list(other) | |||
return self._render() < other | |||
def __le__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() <= list(other) | |||
return self._render() <= other | |||
def __eq__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() == list(other) | |||
return self._render() == other | |||
def __ne__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() != list(other) | |||
return self._render() != other | |||
def __gt__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() > list(other) | |||
return self._render() > other | |||
def __ge__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() >= list(other) | |||
return self._render() >= other | |||
if py3k: | |||
def __bool__(self): | |||
return bool(self._render()) | |||
else: | |||
def __nonzero__(self): | |||
return bool(self._render()) | |||
def __len__(self): | |||
return max((self._stop - self._start) // self._step, 0) | |||
def __getitem__(self, key): | |||
if isinstance(key, slice): | |||
key = self._normalize_slice(key, clamp=True) | |||
keystart = min(self._start + key.start, self._stop) | |||
keystop = min(self._start + key.stop, self._stop) | |||
adjusted = slice(keystart, keystop, key.step) | |||
return self._parent[adjusted] | |||
else: | |||
return self._render()[key] | |||
def __setitem__(self, key, item): | |||
if isinstance(key, slice): | |||
key = self._normalize_slice(key, clamp=True) | |||
keystart = min(self._start + key.start, self._stop) | |||
keystop = min(self._start + key.stop, self._stop) | |||
adjusted = slice(keystart, keystop, key.step) | |||
self._parent[adjusted] = item | |||
else: | |||
length = len(self) | |||
if key < 0: | |||
key = length + key | |||
if key < 0 or key >= length: | |||
raise IndexError("list assignment index out of range") | |||
self._parent[self._start + key] = item | |||
def __delitem__(self, key): | |||
if isinstance(key, slice): | |||
key = self._normalize_slice(key, clamp=True) | |||
keystart = min(self._start + key.start, self._stop) | |||
keystop = min(self._start + key.stop, self._stop) | |||
adjusted = slice(keystart, keystop, key.step) | |||
del self._parent[adjusted] | |||
else: | |||
length = len(self) | |||
if key < 0: | |||
key = length + key | |||
if key < 0 or key >= length: | |||
raise IndexError("list assignment index out of range") | |||
del self._parent[self._start + key] | |||
def __iter__(self): | |||
i = self._start | |||
while i < self._stop: | |||
yield self._parent[i] | |||
i += self._step | |||
def __reversed__(self): | |||
i = self._stop - 1 | |||
while i >= self._start: | |||
yield self._parent[i] | |||
i -= self._step | |||
def __contains__(self, item): | |||
return item in self._render() | |||
if not py3k: | |||
def __getslice__(self, start, stop): | |||
return self.__getitem__(slice(start, stop)) | |||
def __setslice__(self, start, stop, iterable): | |||
self.__setitem__(slice(start, stop), iterable) | |||
def __delslice__(self, start, stop): | |||
self.__delitem__(slice(start, stop)) | |||
def __add__(self, other): | |||
return SmartList(list(self) + other) | |||
def __radd__(self, other): | |||
return SmartList(other + list(self)) | |||
def __iadd__(self, other): | |||
self.extend(other) | |||
return self | |||
def __mul__(self, other): | |||
return SmartList(list(self) * other) | |||
def __rmul__(self, other): | |||
return SmartList(other * list(self)) | |||
def __imul__(self, other): | |||
self.extend(list(self) * (other - 1)) | |||
return self | |||
@property | |||
def _start(self): | |||
"""The starting index of this list, inclusive.""" | |||
return self._sliceinfo[0] | |||
@property | |||
def _stop(self): | |||
"""The ending index of this list, exclusive.""" | |||
if self._sliceinfo[1] is None: | |||
return len(self._parent) | |||
return self._sliceinfo[1] | |||
@property | |||
def _step(self): | |||
"""The number to increase the index by between items.""" | |||
return self._sliceinfo[2] | |||
def _render(self): | |||
"""Return the actual list from the stored start/stop/step.""" | |||
return list(self._parent)[self._start:self._stop:self._step] | |||
@inheritdoc | |||
def append(self, item): | |||
self._parent.insert(self._stop, item) | |||
@inheritdoc | |||
def count(self, item): | |||
return self._render().count(item) | |||
@inheritdoc | |||
def index(self, item, start=None, stop=None): | |||
if start is not None: | |||
if stop is not None: | |||
return self._render().index(item, start, stop) | |||
return self._render().index(item, start) | |||
return self._render().index(item) | |||
@inheritdoc | |||
def extend(self, item): | |||
self._parent[self._stop:self._stop] = item | |||
@inheritdoc | |||
def insert(self, index, item): | |||
if index < 0: | |||
index = len(self) + index | |||
self._parent.insert(self._start + index, item) | |||
@inheritdoc | |||
def pop(self, index=None): | |||
length = len(self) | |||
if index is None: | |||
index = length - 1 | |||
elif index < 0: | |||
index = length + index | |||
if index < 0 or index >= length: | |||
raise IndexError("pop index out of range") | |||
return self._parent.pop(self._start + index) | |||
@inheritdoc | |||
def remove(self, item): | |||
index = self.index(item) | |||
del self._parent[self._start + index] | |||
@inheritdoc | |||
def reverse(self): | |||
item = self._render() | |||
item.reverse() | |||
self._parent[self._start:self._stop:self._step] = item | |||
if py3k: | |||
@inheritdoc | |||
def sort(self, key=None, reverse=None): | |||
item = self._render() | |||
kwargs = {} | |||
if key is not None: | |||
kwargs["key"] = key | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
item.sort(**kwargs) | |||
self._parent[self._start:self._stop:self._step] = item | |||
else: | |||
@inheritdoc | |||
def sort(self, cmp=None, key=None, reverse=None): | |||
item = self._render() | |||
kwargs = {} | |||
if cmp is not None: | |||
kwargs["cmp"] = cmp | |||
if key is not None: | |||
kwargs["key"] = key | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
item.sort(**kwargs) | |||
self._parent[self._start:self._stop:self._step] = item | |||
del inheritdoc |
@@ -0,0 +1,233 @@ | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2019-2020 Yuri Astrakhan <YuriAstrakhan@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
# SmartList has to be a full import in order to avoid cyclical import errors | |||
import mwparserfromhell.smart_list.SmartList | |||
from .utils import _SliceNormalizerMixIn, inheritdoc | |||
class _ListProxy(_SliceNormalizerMixIn, list): | |||
"""Implement the ``list`` interface by getting elements from a parent. | |||
This is created by a :class:`.SmartList` object when slicing. It does not | |||
actually store the list at any time; instead, whenever the list is needed, | |||
it builds it dynamically using the :meth:`_render` method. | |||
""" | |||
def __init__(self, parent, sliceinfo): | |||
super().__init__() | |||
self._parent = parent | |||
self._sliceinfo = sliceinfo | |||
def __repr__(self): | |||
return repr(self._render()) | |||
def __lt__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() < list(other) | |||
return self._render() < other | |||
def __le__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() <= list(other) | |||
return self._render() <= other | |||
def __eq__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() == list(other) | |||
return self._render() == other | |||
def __ne__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() != list(other) | |||
return self._render() != other | |||
def __gt__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() > list(other) | |||
return self._render() > other | |||
def __ge__(self, other): | |||
if isinstance(other, _ListProxy): | |||
return self._render() >= list(other) | |||
return self._render() >= other | |||
def __bool__(self): | |||
return bool(self._render()) | |||
def __len__(self): | |||
return max((self._stop - self._start) // self._step, 0) | |||
def __getitem__(self, key): | |||
if isinstance(key, slice): | |||
key = self._normalize_slice(key, clamp=True) | |||
keystart = min(self._start + key.start, self._stop) | |||
keystop = min(self._start + key.stop, self._stop) | |||
adjusted = slice(keystart, keystop, key.step) | |||
return self._parent[adjusted] | |||
else: | |||
return self._render()[key] | |||
def __setitem__(self, key, item): | |||
if isinstance(key, slice): | |||
key = self._normalize_slice(key, clamp=True) | |||
keystart = min(self._start + key.start, self._stop) | |||
keystop = min(self._start + key.stop, self._stop) | |||
adjusted = slice(keystart, keystop, key.step) | |||
self._parent[adjusted] = item | |||
else: | |||
length = len(self) | |||
if key < 0: | |||
key = length + key | |||
if key < 0 or key >= length: | |||
raise IndexError("list assignment index out of range") | |||
self._parent[self._start + key] = item | |||
def __delitem__(self, key): | |||
if isinstance(key, slice): | |||
key = self._normalize_slice(key, clamp=True) | |||
keystart = min(self._start + key.start, self._stop) | |||
keystop = min(self._start + key.stop, self._stop) | |||
adjusted = slice(keystart, keystop, key.step) | |||
del self._parent[adjusted] | |||
else: | |||
length = len(self) | |||
if key < 0: | |||
key = length + key | |||
if key < 0 or key >= length: | |||
raise IndexError("list assignment index out of range") | |||
del self._parent[self._start + key] | |||
def __iter__(self): | |||
i = self._start | |||
while i < self._stop: | |||
yield self._parent[i] | |||
i += self._step | |||
def __reversed__(self): | |||
i = self._stop - 1 | |||
while i >= self._start: | |||
yield self._parent[i] | |||
i -= self._step | |||
def __contains__(self, item): | |||
return item in self._render() | |||
def __add__(self, other): | |||
return mwparserfromhell.smart_list.SmartList(list(self) + other) | |||
def __radd__(self, other): | |||
return mwparserfromhell.smart_list.SmartList(other + list(self)) | |||
def __iadd__(self, other): | |||
self.extend(other) | |||
return self | |||
def __mul__(self, other): | |||
return mwparserfromhell.smart_list.SmartList(list(self) * other) | |||
def __rmul__(self, other): | |||
return mwparserfromhell.smart_list.SmartList(other * list(self)) | |||
def __imul__(self, other): | |||
self.extend(list(self) * (other - 1)) | |||
return self | |||
@property | |||
def _start(self): | |||
"""The starting index of this list, inclusive.""" | |||
return self._sliceinfo[0] | |||
@property | |||
def _stop(self): | |||
"""The ending index of this list, exclusive.""" | |||
if self._sliceinfo[1] is None: | |||
return len(self._parent) | |||
return self._sliceinfo[1] | |||
@property | |||
def _step(self): | |||
"""The number to increase the index by between items.""" | |||
return self._sliceinfo[2] | |||
def _render(self): | |||
"""Return the actual list from the stored start/stop/step.""" | |||
return list(self._parent)[self._start:self._stop:self._step] | |||
@inheritdoc | |||
def append(self, item): | |||
self._parent.insert(self._stop, item) | |||
@inheritdoc | |||
def count(self, item): | |||
return self._render().count(item) | |||
@inheritdoc | |||
def index(self, item, start=None, stop=None): | |||
if start is not None: | |||
if stop is not None: | |||
return self._render().index(item, start, stop) | |||
return self._render().index(item, start) | |||
return self._render().index(item) | |||
@inheritdoc | |||
def extend(self, item): | |||
self._parent[self._stop:self._stop] = item | |||
@inheritdoc | |||
def insert(self, index, item): | |||
if index < 0: | |||
index = len(self) + index | |||
self._parent.insert(self._start + index, item) | |||
@inheritdoc | |||
def pop(self, index=None): | |||
length = len(self) | |||
if index is None: | |||
index = length - 1 | |||
elif index < 0: | |||
index = length + index | |||
if index < 0 or index >= length: | |||
raise IndexError("pop index out of range") | |||
return self._parent.pop(self._start + index) | |||
@inheritdoc | |||
def remove(self, item): | |||
index = self.index(item) | |||
del self._parent[self._start + index] | |||
@inheritdoc | |||
def reverse(self): | |||
item = self._render() | |||
item.reverse() | |||
self._parent[self._start:self._stop:self._step] = item | |||
@inheritdoc | |||
def sort(self, key=None, reverse=None): | |||
item = self._render() | |||
kwargs = {} | |||
if key is not None: | |||
kwargs["key"] = key | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
item.sort(**kwargs) | |||
self._parent[self._start:self._stop:self._step] = item |
@@ -0,0 +1,157 @@ | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2019-2020 Yuri Astrakhan <YuriAstrakhan@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from _weakref import ref | |||
from .ListProxy import _ListProxy | |||
from .utils import _SliceNormalizerMixIn, inheritdoc | |||
class SmartList(_SliceNormalizerMixIn, list): | |||
"""Implements the ``list`` interface with special handling of sublists. | |||
When a sublist is created (by ``list[i:j]``), any changes made to this | |||
list (such as the addition, removal, or replacement of elements) will be | |||
reflected in the sublist, or vice-versa, to the greatest degree possible. | |||
This is implemented by having sublists - instances of the | |||
:class:`._ListProxy` type - dynamically determine their elements by storing | |||
their slice info and retrieving that slice from the parent. Methods that | |||
change the size of the list also change the slice info. For example:: | |||
>>> parent = SmartList([0, 1, 2, 3]) | |||
>>> parent | |||
[0, 1, 2, 3] | |||
>>> child = parent[2:] | |||
>>> child | |||
[2, 3] | |||
>>> child.append(4) | |||
>>> child | |||
[2, 3, 4] | |||
>>> parent | |||
[0, 1, 2, 3, 4] | |||
""" | |||
def __init__(self, iterable=None): | |||
if iterable: | |||
super().__init__(iterable) | |||
else: | |||
super().__init__() | |||
self._children = {} | |||
def __getitem__(self, key): | |||
if not isinstance(key, slice): | |||
return super().__getitem__(key) | |||
key = self._normalize_slice(key, clamp=False) | |||
sliceinfo = [key.start, key.stop, key.step] | |||
child = _ListProxy(self, sliceinfo) | |||
child_ref = ref(child, self._delete_child) | |||
self._children[id(child_ref)] = (child_ref, sliceinfo) | |||
return child | |||
def __setitem__(self, key, item): | |||
if not isinstance(key, slice): | |||
return super().__setitem__(key, item) | |||
item = list(item) | |||
super().__setitem__(key, item) | |||
key = self._normalize_slice(key, clamp=True) | |||
diff = len(item) + (key.start - key.stop) // key.step | |||
if not diff: | |||
return | |||
for child, (start, stop, step) in self._children.values(): | |||
if start > key.stop: | |||
self._children[id(child)][1][0] += diff | |||
if stop is not None and stop >= key.stop: | |||
self._children[id(child)][1][1] += diff | |||
def __delitem__(self, key): | |||
super().__delitem__(key) | |||
if isinstance(key, slice): | |||
key = self._normalize_slice(key, clamp=True) | |||
else: | |||
key = slice(key, key + 1, 1) | |||
diff = (key.stop - key.start) // key.step | |||
for child, (start, stop, step) in self._children.values(): | |||
if start > key.start: | |||
self._children[id(child)][1][0] -= diff | |||
if stop is not None and stop >= key.stop: | |||
self._children[id(child)][1][1] -= diff | |||
def __add__(self, other): | |||
return SmartList(list(self) + other) | |||
def __radd__(self, other): | |||
return SmartList(other + list(self)) | |||
def __iadd__(self, other): | |||
self.extend(other) | |||
return self | |||
def _delete_child(self, child_ref): | |||
"""Remove a child reference that is about to be garbage-collected.""" | |||
del self._children[id(child_ref)] | |||
def _detach_children(self): | |||
"""Remove all children and give them independent parent copies.""" | |||
children = [val[0] for val in self._children.values()] | |||
for child in children: | |||
child()._parent = list(self) | |||
self._children.clear() | |||
@inheritdoc | |||
def append(self, item): | |||
head = len(self) | |||
self[head:head] = [item] | |||
@inheritdoc | |||
def extend(self, item): | |||
head = len(self) | |||
self[head:head] = item | |||
@inheritdoc | |||
def insert(self, index, item): | |||
self[index:index] = [item] | |||
@inheritdoc | |||
def pop(self, index=None): | |||
if index is None: | |||
index = len(self) - 1 | |||
item = self[index] | |||
del self[index] | |||
return item | |||
@inheritdoc | |||
def remove(self, item): | |||
del self[self.index(item)] | |||
@inheritdoc | |||
def reverse(self): | |||
self._detach_children() | |||
super().reverse() | |||
@inheritdoc | |||
def sort(self, key=None, reverse=None): | |||
self._detach_children() | |||
kwargs = {} | |||
if key is not None: | |||
kwargs["key"] = key | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
super().sort(**kwargs) |
@@ -0,0 +1,29 @@ | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2019-2020 Yuri Astrakhan <YuriAstrakhan@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
""" | |||
This module contains the :class:`.SmartList` type, as well as its | |||
:class:`._ListProxy` child, which together implement a list whose sublists | |||
reflect changes made to the main list, and vice-versa. | |||
""" | |||
from .SmartList import SmartList |
@@ -0,0 +1,50 @@ | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2019-2020 Yuri Astrakhan <YuriAstrakhan@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from sys import maxsize | |||
__all__ = [] | |||
def inheritdoc(method): | |||
"""Set __doc__ of *method* to __doc__ of *method* in its parent class. | |||
Since this is used on :class:`.SmartList`, the "parent class" used is | |||
``list``. This function can be used as a decorator. | |||
""" | |||
method.__doc__ = getattr(list, method.__name__).__doc__ | |||
return method | |||
class _SliceNormalizerMixIn: | |||
"""MixIn that provides a private method to normalize slices.""" | |||
def _normalize_slice(self, key, clamp=False): | |||
"""Return a slice equivalent to the input *key*, standardized.""" | |||
if key.start is None: | |||
start = 0 | |||
else: | |||
start = (len(self) + key.start) if key.start < 0 else key.start | |||
if key.stop is None or key.stop == maxsize: | |||
stop = len(self) if clamp else None | |||
else: | |||
stop = (len(self) + key.stop) if key.stop < 0 else key.stop | |||
return slice(start, stop, key.step or 1) |
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -22,14 +21,11 @@ | |||
""" | |||
This module contains the :class:`.StringMixIn` type, which implements the | |||
interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. | |||
interface for the ``str`` type in a dynamic manner. | |||
""" | |||
from __future__ import unicode_literals | |||
from sys import getdefaultencoding | |||
from .compat import bytes, py26, py3k, str | |||
__all__ = ["StringMixIn"] | |||
def inheritdoc(method): | |||
@@ -41,24 +37,20 @@ def inheritdoc(method): | |||
method.__doc__ = getattr(str, method.__name__).__doc__ | |||
return method | |||
class StringMixIn(object): | |||
class StringMixIn: | |||
"""Implement the interface for ``unicode``/``str`` in a dynamic manner. | |||
To use this class, inherit from it and override the :meth:`__unicode__` | |||
method (same on py3k) to return the string representation of the object. | |||
method to return the string representation of the object. | |||
The various string methods will operate on the value of :meth:`__unicode__` | |||
instead of the immutable ``self`` like the regular ``str`` type. | |||
""" | |||
if py3k: | |||
def __str__(self): | |||
return self.__unicode__() | |||
def __str__(self): | |||
return self.__unicode__() | |||
def __bytes__(self): | |||
return bytes(self.__unicode__(), getdefaultencoding()) | |||
else: | |||
def __str__(self): | |||
return bytes(self.__unicode__()) | |||
def __bytes__(self): | |||
return bytes(self.__unicode__(), getdefaultencoding()) | |||
def __unicode__(self): | |||
raise NotImplementedError() | |||
@@ -84,19 +76,14 @@ class StringMixIn(object): | |||
def __ge__(self, other): | |||
return self.__unicode__() >= other | |||
if py3k: | |||
def __bool__(self): | |||
return bool(self.__unicode__()) | |||
else: | |||
def __nonzero__(self): | |||
return bool(self.__unicode__()) | |||
def __bool__(self): | |||
return bool(self.__unicode__()) | |||
def __len__(self): | |||
return len(self.__unicode__()) | |||
def __iter__(self): | |||
for char in self.__unicode__(): | |||
yield char | |||
yield from self.__unicode__() | |||
def __getitem__(self, key): | |||
return self.__unicode__()[key] | |||
@@ -109,21 +96,11 @@ class StringMixIn(object): | |||
def __getattr__(self, attr): | |||
if not hasattr(str, attr): | |||
raise AttributeError("{0!r} object has no attribute {1!r}".format( | |||
raise AttributeError("{!r} object has no attribute {!r}".format( | |||
type(self).__name__, attr)) | |||
return getattr(self.__unicode__(), attr) | |||
if py3k: | |||
maketrans = str.maketrans # Static method can't rely on __getattr__ | |||
if py26: | |||
@inheritdoc | |||
def encode(self, encoding=None, errors=None): | |||
if encoding is None: | |||
encoding = getdefaultencoding() | |||
if errors is not None: | |||
return self.__unicode__().encode(encoding, errors) | |||
return self.__unicode__().encode(encoding) | |||
maketrans = str.maketrans # Static method can't rely on __getattr__ | |||
del inheritdoc |
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -25,9 +24,7 @@ This module contains accessory functions for other parts of the library. Parser | |||
users generally won't need stuff from here. | |||
""" | |||
from __future__ import unicode_literals | |||
from .compat import bytes, str | |||
from .nodes import Node | |||
from .smart_list import SmartList | |||
@@ -70,5 +67,5 @@ def parse_anything(value, context=0, skip_style_tags=False): | |||
nodelist += parse_anything(item, context, skip_style_tags).nodes | |||
return Wikicode(nodelist) | |||
except TypeError: | |||
error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" | |||
error = "Needs string, Node, Wikicode, file, int, None, or iterable of these, but got {0}: {1}" | |||
raise ValueError(error.format(type(value).__name__, value)) |
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,13 +19,12 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from itertools import chain | |||
import re | |||
from itertools import chain | |||
from .compat import bytes, py3k, range, str | |||
from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, | |||
Node, Tag, Template, Text, Wikilink) | |||
from .smart_list.ListProxy import _ListProxy | |||
from .string_mixin import StringMixIn | |||
from .utils import parse_anything | |||
@@ -47,7 +45,7 @@ class Wikicode(StringMixIn): | |||
RECURSE_OTHERS = 2 | |||
def __init__(self, nodes): | |||
super(Wikicode, self).__init__() | |||
super().__init__() | |||
self._nodes = nodes | |||
def __unicode__(self): | |||
@@ -55,15 +53,14 @@ class Wikicode(StringMixIn): | |||
@staticmethod | |||
def _get_children(node, contexts=False, restrict=None, parent=None): | |||
"""Iterate over all child :class:`.Node`\ s of a given *node*.""" | |||
"""Iterate over all child :class:`.Node`\\ s of a given *node*.""" | |||
yield (parent, node) if contexts else node | |||
if restrict and isinstance(node, restrict): | |||
return | |||
for code in node.__children__(): | |||
for child in code.nodes: | |||
sub = Wikicode._get_children(child, contexts, restrict, code) | |||
for result in sub: | |||
yield result | |||
yield from sub | |||
@staticmethod | |||
def _slice_replace(code, index, old, new): | |||
@@ -108,6 +105,26 @@ class Wikicode(StringMixIn): | |||
if (not forcetype or isinstance(node, forcetype)) and match(node): | |||
yield (i, node) | |||
def _is_child_wikicode(self, obj, recursive=True): | |||
"""Return whether the given :class:`.Wikicode` is a descendant.""" | |||
def deref(nodes): | |||
if isinstance(nodes, _ListProxy): | |||
return nodes._parent # pylint: disable=protected-access | |||
return nodes | |||
target = deref(obj.nodes) | |||
if target is deref(self.nodes): | |||
return True | |||
if recursive: | |||
todo = [self] | |||
while todo: | |||
code = todo.pop() | |||
if target is deref(code.nodes): | |||
return True | |||
for node in code.nodes: | |||
todo += list(node.__children__()) | |||
return False | |||
def _do_strong_search(self, obj, recursive=True): | |||
"""Search for the specific element *obj* within the node list. | |||
@@ -120,11 +137,16 @@ class Wikicode(StringMixIn): | |||
:class:`.Wikicode` contained by a node within ``self``. If *obj* is not | |||
found, :exc:`ValueError` is raised. | |||
""" | |||
if isinstance(obj, Wikicode): | |||
if not self._is_child_wikicode(obj, recursive): | |||
raise ValueError(obj) | |||
return obj, slice(0, len(obj.nodes)) | |||
if isinstance(obj, Node): | |||
mkslice = lambda i: slice(i, i + 1) | |||
if not recursive: | |||
return self, mkslice(self.index(obj)) | |||
for i, node in enumerate(self.nodes): | |||
for node in self.nodes: | |||
for context, child in self._get_children(node, contexts=True): | |||
if obj is child: | |||
if not context: | |||
@@ -132,11 +154,7 @@ class Wikicode(StringMixIn): | |||
return context, mkslice(context.index(child)) | |||
raise ValueError(obj) | |||
context, ind = self._do_strong_search(obj.get(0), recursive) | |||
for i in range(1, len(obj.nodes)): | |||
if obj.get(i) is not context.get(ind.start + i): | |||
raise ValueError(obj) | |||
return context, slice(ind.start, ind.start + len(obj.nodes)) | |||
raise TypeError(obj) | |||
def _do_weak_search(self, obj, recursive): | |||
"""Search for an element that looks like *obj* within the node list. | |||
@@ -230,7 +248,7 @@ class Wikicode(StringMixIn): | |||
self.ifilter(forcetype=ftype, *a, **kw)) | |||
make_filter = lambda ftype: (lambda self, *a, **kw: | |||
self.filter(forcetype=ftype, *a, **kw)) | |||
for name, ftype in (meths.items() if py3k else meths.iteritems()): | |||
for name, ftype in meths.items(): | |||
ifilter = make_ifilter(ftype) | |||
filter = make_filter(ftype) | |||
ifilter.__doc__ = doc.format(name, "ifilter", ftype) | |||
@@ -254,7 +272,7 @@ class Wikicode(StringMixIn): | |||
self._nodes = value | |||
def get(self, index): | |||
"""Return the *index*\ th node within the list of nodes.""" | |||
"""Return the *index*\\ th node within the list of nodes.""" | |||
return self.nodes[index] | |||
def set(self, index, value): | |||
@@ -479,16 +497,16 @@ class Wikicode(StringMixIn): | |||
letter's case is normalized. Typical usage is | |||
``if template.name.matches("stub"): ...``. | |||
""" | |||
cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:] | |||
if a and b else a == b) | |||
this = self.strip_code().strip() | |||
normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s | |||
this = normalize(self.strip_code().strip()) | |||
if isinstance(other, (str, bytes, Wikicode, Node)): | |||
that = parse_anything(other).strip_code().strip() | |||
return cmp(this, that) | |||
return this == normalize(that) | |||
for obj in other: | |||
that = parse_anything(obj).strip_code().strip() | |||
if cmp(this, that): | |||
if this == normalize(that): | |||
return True | |||
return False | |||
@@ -40,7 +40,6 @@ import sys | |||
import psutil | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||
if sys.version_info[0] == 2: | |||
@@ -80,7 +79,7 @@ class MemoryTest(object): | |||
raw = raw.encode("raw_unicode_escape") | |||
data["input"] = raw.decode("unicode_escape") | |||
number = str(counter).zfill(digits) | |||
fname = "test_{0}{1}_{2}".format(name, number, data["name"]) | |||
fname = "test_{}{}_{}".format(name, number, data["name"]) | |||
self._tests.append((fname, data["input"])) | |||
counter += 1 | |||
@@ -88,8 +87,6 @@ class MemoryTest(object): | |||
def load_file(filename): | |||
with open(filename, "rU") as fp: | |||
text = fp.read() | |||
if not py3k: | |||
text = text.decode("utf8") | |||
name = path.split(filename)[1][:0-len(extension)] | |||
self._parse_file(name, text) | |||
@@ -117,7 +114,7 @@ class MemoryTest(object): | |||
tmpl = "{0}LEAKING{1}: {2:n} bytes, {3:.2%} inc ({4:n} bytes/loop)" | |||
sys.stdout.write(tmpl.format(Color.YELLOW, Color.RESET, d, p, bpt)) | |||
else: | |||
sys.stdout.write("{0}OK{1}".format(Color.GREEN, Color.RESET)) | |||
sys.stdout.write("{}OK{}".format(Color.GREEN, Color.RESET)) | |||
def run(self): | |||
"""Run the memory test suite.""" | |||
@@ -1,7 +1,5 @@ | |||
#! /usr/bin/env bash | |||
set -euo pipefail | |||
if [[ -z "$1" ]]; then | |||
echo "usage: $0 1.2.3" | |||
exit 1 | |||
@@ -77,7 +75,8 @@ do_git_stuff() { | |||
upload_to_pypi() { | |||
echo -n "PyPI: uploading source tarball..." | |||
python setup.py -q register sdist upload -s | |||
python setup.py -q sdist | |||
twine upload -s dist/mwparserfromhell-$VERSION* | |||
echo " done." | |||
} | |||
@@ -85,7 +84,7 @@ post_release() { | |||
echo | |||
echo "*** Release completed." | |||
echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION" | |||
echo "*** Verify: https://pypi.python.org/pypi/mwparserfromhell" | |||
echo "*** Verify: https://pypi.org/project/mwparserfromhell" | |||
echo "*** Verify: https://ci.appveyor.com/project/earwig/mwparserfromhell" | |||
echo "*** Verify: https://mwparserfromhell.readthedocs.io" | |||
echo "*** Press enter to sanity-check the release." | |||
@@ -97,7 +96,7 @@ test_release() { | |||
echo "Checking mwparserfromhell v$VERSION..." | |||
echo -n "Creating a virtualenv..." | |||
virtdir="mwparser-test-env" | |||
virtualenv -q $virtdir | |||
python -m venv $virtdir | |||
cd $virtdir | |||
source bin/activate | |||
echo " done." | |||
@@ -105,7 +104,7 @@ test_release() { | |||
pip -q install mwparserfromhell | |||
echo " done." | |||
echo -n "Checking version..." | |||
reported_version=$(python -c 'print __import__("mwparserfromhell").__version__') | |||
reported_version=$(python -c 'print(__import__("mwparserfromhell").__version__)') | |||
if [[ "$reported_version" != "$VERSION" ]]; then | |||
echo " error." | |||
echo "*** ERROR: mwparserfromhell is reporting its version as $reported_version, not $VERSION!" | |||
@@ -134,7 +133,8 @@ test_release() { | |||
rm mwparserfromhell.tar.gz mwparserfromhell.tar.gz.asc | |||
cd mwparserfromhell-$VERSION | |||
echo "Running unit tests..." | |||
python setup.py -q test | |||
python setup.py -q install | |||
python -m unittest discover | |||
if [[ "$?" != "0" ]]; then | |||
echo "*** ERROR: Unit tests failed!" | |||
deactivate | |||
@@ -1,5 +1,4 @@ | |||
#! /usr/bin/env python | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -21,23 +20,17 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import print_function | |||
from distutils.errors import DistutilsError, CCompilerError | |||
from glob import glob | |||
from os import environ | |||
import sys | |||
if ((sys.version_info[0] == 2 and sys.version_info[1] < 6) or | |||
(sys.version_info[1] == 3 and sys.version_info[1] < 2)): | |||
raise RuntimeError("mwparserfromhell needs Python 2.6+ or 3.2+") | |||
from setuptools import setup, find_packages, Extension | |||
from setuptools.command.build_ext import build_ext | |||
from mwparserfromhell import __version__ | |||
from mwparserfromhell.compat import py26, py3k | |||
with open("README.rst", **({'encoding':'utf-8'} if py3k else {})) as fp: | |||
with open("README.rst", encoding='utf-8') as fp: | |||
long_docs = fp.read() | |||
use_extension = True | |||
@@ -76,21 +69,21 @@ if fallback: | |||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||
sources=sorted(glob("mwparserfromhell/parser/ctokenizer/*.c")), | |||
depends=glob("mwparserfromhell/parser/ctokenizer/*.h")) | |||
depends=sorted(glob("mwparserfromhell/parser/ctokenizer/*.h"))) | |||
setup( | |||
name = "mwparserfromhell", | |||
packages = find_packages(exclude=("tests",)), | |||
ext_modules = [tokenizer] if use_extension else [], | |||
tests_require = ["unittest2"] if py26 else [], | |||
test_suite = "tests.discover", | |||
test_suite = "tests", | |||
version = __version__, | |||
python_requires = ">= 3.4", | |||
author = "Ben Kurtovic", | |||
author_email = "ben.kurtovic@gmail.com", | |||
url = "https://github.com/earwig/mwparserfromhell", | |||
description = "MWParserFromHell is a parser for MediaWiki wikicode.", | |||
long_description = long_docs, | |||
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{0}".format(__version__), | |||
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{}".format(__version__), | |||
keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", | |||
license = "MIT License", | |||
classifiers = [ | |||
@@ -99,15 +92,12 @@ setup( | |||
"Intended Audience :: Developers", | |||
"License :: OSI Approved :: MIT License", | |||
"Operating System :: OS Independent", | |||
"Programming Language :: Python :: 2.6", | |||
"Programming Language :: Python :: 2.7", | |||
"Programming Language :: Python :: 3", | |||
"Programming Language :: Python :: 3.2", | |||
"Programming Language :: Python :: 3.3", | |||
"Programming Language :: Python :: 3.4", | |||
"Programming Language :: Python :: 3.5", | |||
"Programming Language :: Python :: 3.6", | |||
"Programming Language :: Python :: 3.7", | |||
"Programming Language :: Python :: 3.8", | |||
"Topic :: Text Processing :: Markup" | |||
], | |||
) |
@@ -1 +0,0 @@ | |||
# -*- coding: utf-8 -*- |
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,12 +19,11 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import print_function, unicode_literals | |||
import codecs | |||
from os import listdir, path | |||
import sys | |||
import warnings | |||
from mwparserfromhell.compat import py3k, str | |||
from mwparserfromhell.parser import tokens | |||
from mwparserfromhell.parser.builder import Builder | |||
@@ -34,7 +32,7 @@ class _TestParseError(Exception): | |||
pass | |||
class TokenizerTestCase(object): | |||
class TokenizerTestCase: | |||
"""A base test case for tokenizers, whose tests are loaded dynamically. | |||
Subclassed along with unittest.TestCase to form TestPyTokenizer and | |||
@@ -59,8 +57,6 @@ class TokenizerTestCase(object): | |||
actual = self.tokenizer().tokenize(data["input"]) | |||
self.assertEqual(expected, actual) | |||
if not py3k: | |||
inner.__name__ = funcname.encode("utf8") | |||
inner.__doc__ = data["label"] | |||
return inner | |||
@@ -98,19 +94,19 @@ class TokenizerTestCase(object): | |||
except _TestParseError as err: | |||
if data["name"]: | |||
error = "Could not parse test '{0}' in '{1}':\n\t{2}" | |||
print(error.format(data["name"], filename, err)) | |||
warnings.warn(error.format(data["name"], filename, err)) | |||
else: | |||
error = "Could not parse a test in '{0}':\n\t{1}" | |||
print(error.format(filename, err)) | |||
warnings.warn(error.format(filename, err)) | |||
continue | |||
if not data["name"]: | |||
error = "A test in '{0}' was ignored because it lacked a name" | |||
print(error.format(filename)) | |||
warnings.warn(error.format(filename)) | |||
continue | |||
if data["input"] is None or data["output"] is None: | |||
error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" | |||
print(error.format(data["name"], filename)) | |||
error = "Test '{}' in '{}' was ignored because it lacked an input or an output" | |||
warnings.warn(error.format(data["name"], filename)) | |||
continue | |||
number = str(counter).zfill(digits) | |||
@@ -118,7 +114,7 @@ class TokenizerTestCase(object): | |||
if restrict and data["name"] != restrict: | |||
continue | |||
fname = "test_{0}{1}_{2}".format(name, number, data["name"]) | |||
fname = "test_{}{}_{}".format(name, number, data["name"]) | |||
meth = cls._build_test_method(fname, data) | |||
setattr(cls, fname, meth) | |||
@@ -126,7 +122,7 @@ class TokenizerTestCase(object): | |||
def build(cls): | |||
"""Load and install all tests from the 'tokenizer' directory.""" | |||
def load_file(filename, restrict=None): | |||
with codecs.open(filename, "rU", encoding="utf8") as fp: | |||
with codecs.open(filename, "r", encoding="utf8") as fp: | |||
text = fp.read() | |||
name = path.split(filename)[1][:-len(extension)] | |||
cls._load_tests(filename, name, text, restrict) | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from unittest import TestCase | |||
try: | |||
from unittest2 import TestCase | |||
except ImportError: | |||
from unittest import TestCase | |||
from mwparserfromhell.compat import range | |||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||
Tag, Template, Text, Wikilink) | |||
from mwparserfromhell.nodes.extras import Attribute, Parameter | |||
@@ -71,7 +64,7 @@ class TreeEqualityTestCase(TestCase): | |||
def assertCommentNodeEqual(self, expected, actual): | |||
"""Assert that two Comment nodes have the same data.""" | |||
self.assertWikicodeEqual(expected.contents, actual.contents) | |||
self.assertEqual(expected.contents, actual.contents) | |||
def assertHeadingNodeEqual(self, expected, actual): | |||
"""Assert that two Heading nodes have the same data.""" | |||
@@ -1,18 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Serves the same purpose as mwparserfromhell.compat, but only for objects | |||
required by unit tests. This avoids unnecessary imports (like urllib) within | |||
the main library. | |||
""" | |||
from mwparserfromhell.compat import py3k | |||
if py3k: | |||
from io import StringIO | |||
from urllib.parse import urlencode | |||
from urllib.request import urlopen | |||
else: | |||
from StringIO import StringIO | |||
from urllib import urlencode, urlopen |
@@ -1,24 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Discover tests using ``unittest2` for Python 2.6. | |||
It appears the default distutils test suite doesn't play nice with | |||
``setUpClass`` thereby making some tests fail. Using ``unittest2`` to load | |||
tests seems to work around that issue. | |||
http://stackoverflow.com/a/17004409/753501 | |||
""" | |||
import os.path | |||
from mwparserfromhell.compat import py26 | |||
if py26: | |||
import unittest2 as unittest | |||
else: | |||
import unittest | |||
def additional_tests(): | |||
project_root = os.path.split(os.path.dirname(__file__))[0] | |||
return unittest.defaultTestLoader.discover(project_root) |
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Argument, Text | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Template | |||
from mwparserfromhell.nodes.extras import Attribute | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, | |||
HTMLEntity, Tag, Template, Text, Wikilink) | |||
from mwparserfromhell.nodes.extras import Attribute, Parameter | |||
@@ -236,11 +229,11 @@ class TestBuilder(TreeEqualityTestCase): | |||
tests = [ | |||
([tokens.CommentStart(), tokens.Text(text="foobar"), | |||
tokens.CommentEnd()], | |||
wrap([Comment(wraptext("foobar"))])), | |||
wrap([Comment("foobar")])), | |||
([tokens.CommentStart(), tokens.Text(text="spam"), | |||
tokens.Text(text="eggs"), tokens.CommentEnd()], | |||
wrap([Comment(wraptext("spam", "eggs"))])), | |||
wrap([Comment("spameggs")])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
@@ -416,7 +409,7 @@ class TestBuilder(TreeEqualityTestCase): | |||
wraptext("c"), params=[Parameter(wraptext("1"), wrap([Wikilink( | |||
wraptext("d")), Argument(wraptext("e"))]), showkey=False)])]), | |||
showkey=False)]), Wikilink(wraptext("f"), wrap([Argument(wraptext( | |||
"g")), Comment(wraptext("h"))])), Template(wraptext("i"), params=[ | |||
"g")), Comment("h")])), Template(wraptext("i"), params=[ | |||
Parameter(wraptext("j"), wrap([HTMLEntity("nbsp", | |||
named=True)]))])]) | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
@@ -432,9 +425,8 @@ class TestBuilder(TreeEqualityTestCase): | |||
[tokens.TagOpenOpen()] | |||
] | |||
func = self.assertRaisesRegex if py3k else self.assertRaisesRegexp | |||
msg = r"_handle_token\(\) got unexpected TemplateClose" | |||
func(ParserError, msg, self.builder.build, [tokens.TemplateClose()]) | |||
self.assertRaisesRegex(ParserError, msg, self.builder.build, [tokens.TemplateClose()]) | |||
for test in missing_closes: | |||
self.assertRaises(ParserError, self.builder.build, test) | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Comment | |||
from ._test_tree_equality import TreeEqualityTestCase | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,12 +19,7 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
try: | |||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,19 +19,14 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import print_function, unicode_literals | |||
import json | |||
from io import StringIO | |||
import os | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from urllib.parse import urlencode | |||
from urllib.request import urlopen | |||
import mwparserfromhell | |||
from mwparserfromhell.compat import py3k, str | |||
from .compat import StringIO, urlencode, urlopen | |||
class TestDocs(unittest.TestCase): | |||
"""Integration test cases for mwparserfromhell's documentation.""" | |||
@@ -51,16 +45,10 @@ class TestDocs(unittest.TestCase): | |||
self.assertPrint(wikicode, | |||
"I has a template! {{foo|bar|baz|eggs=spam}} See it?") | |||
templates = wikicode.filter_templates() | |||
if py3k: | |||
self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") | |||
else: | |||
self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']") | |||
self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") | |||
template = templates[0] | |||
self.assertPrint(template.name, "foo") | |||
if py3k: | |||
self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") | |||
else: | |||
self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']") | |||
self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") | |||
self.assertPrint(template.get(1).value, "bar") | |||
self.assertPrint(template.get("eggs").value, "spam") | |||
@@ -68,21 +56,14 @@ class TestDocs(unittest.TestCase): | |||
"""test a block of example code in the README""" | |||
text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" | |||
temps = mwparserfromhell.parse(text).filter_templates() | |||
if py3k: | |||
res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" | |||
else: | |||
res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']" | |||
res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" | |||
self.assertPrint(temps, res) | |||
def test_readme_3(self): | |||
"""test a block of example code in the README""" | |||
code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | |||
if py3k: | |||
self.assertPrint(code.filter_templates(recursive=False), | |||
"['{{foo|this {{includes a|template}}}}']") | |||
else: | |||
self.assertPrint(code.filter_templates(recursive=False), | |||
"[u'{{foo|this {{includes a|template}}}}']") | |||
self.assertPrint(code.filter_templates(recursive=False), | |||
"['{{foo|this {{includes a|template}}}}']") | |||
foo = code.filter_templates(recursive=False)[0] | |||
self.assertPrint(foo.get(1).value, "this {{includes a|template}}") | |||
self.assertPrint(foo.get(1).value.filter_templates()[0], | |||
@@ -102,10 +83,7 @@ class TestDocs(unittest.TestCase): | |||
code.replace("{{uncategorized}}", "{{bar-stub}}") | |||
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" | |||
self.assertPrint(code, res) | |||
if py3k: | |||
res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" | |||
else: | |||
res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']" | |||
res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" | |||
self.assertPrint(code.filter_templates(), res) | |||
text = str(code) | |||
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" | |||
@@ -118,17 +96,26 @@ class TestDocs(unittest.TestCase): | |||
url1 = "https://en.wikipedia.org/w/api.php" | |||
url2 = "https://en.wikipedia.org/w/index.php?title={0}&action=raw" | |||
title = "Test" | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
"rvprop": "content", "format": "json", "titles": title} | |||
data = { | |||
"action": "query", | |||
"prop": "revisions", | |||
"rvprop": "content", | |||
"rvslots": "main", | |||
"rvlimit": 1, | |||
"titles": title, | |||
"format": "json", | |||
"formatversion": "2", | |||
} | |||
try: | |||
raw = urlopen(url1, urlencode(data).encode("utf8")).read() | |||
except IOError: | |||
except OSError: | |||
self.skipTest("cannot continue because of unsuccessful web call") | |||
res = json.loads(raw.decode("utf8")) | |||
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] | |||
revision = res["query"]["pages"][0]["revisions"][0] | |||
text = revision["slots"]["main"]["content"] | |||
try: | |||
expected = urlopen(url2.format(title)).read().decode("utf8") | |||
except IOError: | |||
except OSError: | |||
self.skipTest("cannot continue because of unsuccessful web call") | |||
actual = mwparserfromhell.parse(text) | |||
self.assertEqual(expected, actual) | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import ExternalLink, Text | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Heading, Text | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import HTMLEntity | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Text | |||
from mwparserfromhell.nodes.extras import Parameter | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,15 +19,9 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell import parser | |||
from mwparserfromhell.compat import range | |||
from mwparserfromhell.nodes import Tag, Template, Text, Wikilink | |||
from mwparserfromhell.nodes.extras import Parameter | |||
@@ -1,6 +1,5 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,13 +19,9 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.parser import contexts | |||
from mwparserfromhell.parser.tokenizer import Tokenizer | |||
from ._test_tokenizer import TokenizerTestCase | |||
@@ -44,5 +39,10 @@ class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): | |||
self.assertFalse(Tokenizer.USES_C) | |||
self.assertFalse(Tokenizer().USES_C) | |||
def test_describe_context(self): | |||
self.assertEqual("", contexts.describe(0)) | |||
ctx = contexts.describe(contexts.TEMPLATE_PARAM_KEY|contexts.HAS_TEXT) | |||
self.assertEqual("TEMPLATE_PARAM_KEY|HAS_TEXT", ctx) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,12 +19,7 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from ._test_tokenizer import TokenizerTestCase | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,15 +19,11 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.smart_list import SmartList | |||
from mwparserfromhell.smart_list.ListProxy import _ListProxy | |||
from mwparserfromhell.compat import py3k, range | |||
from mwparserfromhell.smart_list import SmartList, _ListProxy | |||
class TestSmartList(unittest.TestCase): | |||
"""Test cases for the SmartList class and its child, _ListProxy.""" | |||
@@ -130,45 +125,40 @@ class TestSmartList(unittest.TestCase): | |||
list3 = builder([0, 2, 3, 4]) | |||
list4 = builder([0, 1, 2]) | |||
if py3k: | |||
self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) | |||
self.assertEqual(b"\x00\x01\x02", bytes(list4)) | |||
self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) | |||
else: | |||
self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) | |||
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) | |||
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) | |||
self.assertTrue(list1 < list3) | |||
self.assertTrue(list1 <= list3) | |||
self.assertFalse(list1 == list3) | |||
self.assertTrue(list1 != list3) | |||
self.assertFalse(list1 > list3) | |||
self.assertFalse(list1 >= list3) | |||
self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) | |||
self.assertEqual(b"\x00\x01\x02", bytes(list4)) | |||
self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) | |||
self.assertLess(list1, list3) | |||
self.assertLessEqual(list1, list3) | |||
self.assertNotEqual(list1, list3) | |||
self.assertNotEqual(list1, list3) | |||
self.assertLessEqual(list1, list3) | |||
self.assertLess(list1, list3) | |||
other1 = [0, 2, 3, 4] | |||
self.assertTrue(list1 < other1) | |||
self.assertTrue(list1 <= other1) | |||
self.assertFalse(list1 == other1) | |||
self.assertTrue(list1 != other1) | |||
self.assertFalse(list1 > other1) | |||
self.assertFalse(list1 >= other1) | |||
self.assertLess(list1, other1) | |||
self.assertLessEqual(list1, other1) | |||
self.assertNotEqual(list1, other1) | |||
self.assertNotEqual(list1, other1) | |||
self.assertLessEqual(list1, other1) | |||
self.assertLess(list1, other1) | |||
other2 = [0, 0, 1, 2] | |||
self.assertFalse(list1 < other2) | |||
self.assertFalse(list1 <= other2) | |||
self.assertFalse(list1 == other2) | |||
self.assertTrue(list1 != other2) | |||
self.assertTrue(list1 > other2) | |||
self.assertTrue(list1 >= other2) | |||
self.assertGreaterEqual(list1, other2) | |||
self.assertGreater(list1, other2) | |||
self.assertNotEqual(list1, other2) | |||
self.assertNotEqual(list1, other2) | |||
self.assertGreater(list1, other2) | |||
self.assertGreaterEqual(list1, other2) | |||
other3 = [0, 1, 2, 3, "one", "two"] | |||
self.assertFalse(list1 < other3) | |||
self.assertTrue(list1 <= other3) | |||
self.assertTrue(list1 == other3) | |||
self.assertFalse(list1 != other3) | |||
self.assertFalse(list1 > other3) | |||
self.assertTrue(list1 >= other3) | |||
self.assertGreaterEqual(list1, other3) | |||
self.assertLessEqual(list1, other3) | |||
self.assertEqual(list1, other3) | |||
self.assertEqual(list1, other3) | |||
self.assertLessEqual(list1, other3) | |||
self.assertGreaterEqual(list1, other3) | |||
self.assertTrue(bool(list1)) | |||
self.assertFalse(bool(list2)) | |||
@@ -198,10 +188,10 @@ class TestSmartList(unittest.TestCase): | |||
self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) | |||
self.assertEqual([], list(reversed(list2))) | |||
self.assertTrue("one" in list1) | |||
self.assertTrue(3 in list1) | |||
self.assertFalse(10 in list1) | |||
self.assertFalse(0 in list2) | |||
self.assertIn("one", list1) | |||
self.assertIn(3, list1) | |||
self.assertNotIn(10, list1) | |||
self.assertNotIn(0, list2) | |||
self.assertEqual([], list2 * 5) | |||
self.assertEqual([], 5 * list2) | |||
@@ -265,12 +255,6 @@ class TestSmartList(unittest.TestCase): | |||
self.assertEqual([0, 2, 2, 3, 4, 5], list1) | |||
list1.sort(reverse=True) | |||
self.assertEqual([5, 4, 3, 2, 2, 0], list1) | |||
if not py3k: | |||
func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3 | |||
list1.sort(cmp=func) | |||
self.assertEqual([3, 4, 2, 2, 5, 0], list1) | |||
list1.sort(cmp=func, reverse=True) | |||
self.assertEqual([0, 5, 4, 2, 2, 3], list1) | |||
list3.sort(key=lambda i: i[1]) | |||
self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) | |||
list3.sort(key=lambda i: i[1], reverse=True) | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,16 +19,10 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from sys import getdefaultencoding | |||
from types import GeneratorType | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import bytes, py3k, py32, range, str | |||
from mwparserfromhell.string_mixin import StringMixIn | |||
class _FakeString(StringMixIn): | |||
@@ -46,20 +39,16 @@ class TestStringMixIn(unittest.TestCase): | |||
def test_docs(self): | |||
"""make sure the various methods of StringMixIn have docstrings""" | |||
methods = [ | |||
"capitalize", "center", "count", "encode", "endswith", | |||
"expandtabs", "find", "format", "index", "isalnum", "isalpha", | |||
"isdecimal", "isdigit", "islower", "isnumeric", "isspace", | |||
"istitle", "isupper", "join", "ljust", "lower", "lstrip", | |||
"partition", "replace", "rfind", "rindex", "rjust", "rpartition", | |||
"rsplit", "rstrip", "split", "splitlines", "startswith", "strip", | |||
"swapcase", "title", "translate", "upper", "zfill"] | |||
if py3k: | |||
if not py32: | |||
methods.append("casefold") | |||
methods.extend(["format_map", "isidentifier", "isprintable", | |||
"maketrans"]) | |||
else: | |||
methods.append("decode") | |||
"capitalize", "casefold", "center", "count", "encode", "endswith", | |||
"expandtabs", "find", "format", "format_map", "index", "isalnum", | |||
"isalpha", "isdecimal", "isdigit", "isidentifier", "islower", | |||
"isnumeric", "isprintable", "isspace", "istitle", "isupper", | |||
"join", "ljust", "lower", "lstrip", "maketrans", "partition", | |||
"replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", | |||
"rstrip", "split", "splitlines", "startswith", "strip", "swapcase", | |||
"title", "translate", "upper", "zfill" | |||
] | |||
for meth in methods: | |||
expected = getattr("foo", meth).__doc__ | |||
actual = getattr(_FakeString("foo"), meth).__doc__ | |||
@@ -70,17 +59,11 @@ class TestStringMixIn(unittest.TestCase): | |||
fstr = _FakeString("fake string") | |||
self.assertEqual(str(fstr), "fake string") | |||
self.assertEqual(bytes(fstr), b"fake string") | |||
if py3k: | |||
self.assertEqual(repr(fstr), "'fake string'") | |||
else: | |||
self.assertEqual(repr(fstr), b"u'fake string'") | |||
self.assertEqual(repr(fstr), "'fake string'") | |||
self.assertIsInstance(str(fstr), str) | |||
self.assertIsInstance(bytes(fstr), bytes) | |||
if py3k: | |||
self.assertIsInstance(repr(fstr), str) | |||
else: | |||
self.assertIsInstance(repr(fstr), bytes) | |||
self.assertIsInstance(repr(fstr), str) | |||
def test_comparisons(self): | |||
"""make sure comparison operators work""" | |||
@@ -90,33 +73,33 @@ class TestStringMixIn(unittest.TestCase): | |||
str4 = "this is a fake string" | |||
str5 = "fake string, this is" | |||
self.assertFalse(str1 > str2) | |||
self.assertTrue(str1 >= str2) | |||
self.assertTrue(str1 == str2) | |||
self.assertFalse(str1 != str2) | |||
self.assertFalse(str1 < str2) | |||
self.assertTrue(str1 <= str2) | |||
self.assertTrue(str1 > str3) | |||
self.assertTrue(str1 >= str3) | |||
self.assertFalse(str1 == str3) | |||
self.assertTrue(str1 != str3) | |||
self.assertFalse(str1 < str3) | |||
self.assertFalse(str1 <= str3) | |||
self.assertFalse(str1 > str4) | |||
self.assertTrue(str1 >= str4) | |||
self.assertTrue(str1 == str4) | |||
self.assertFalse(str1 != str4) | |||
self.assertFalse(str1 < str4) | |||
self.assertTrue(str1 <= str4) | |||
self.assertFalse(str5 > str1) | |||
self.assertFalse(str5 >= str1) | |||
self.assertFalse(str5 == str1) | |||
self.assertTrue(str5 != str1) | |||
self.assertTrue(str5 < str1) | |||
self.assertTrue(str5 <= str1) | |||
self.assertLessEqual(str1, str2) | |||
self.assertGreaterEqual(str1, str2) | |||
self.assertEqual(str1, str2) | |||
self.assertEqual(str1, str2) | |||
self.assertGreaterEqual(str1, str2) | |||
self.assertLessEqual(str1, str2) | |||
self.assertGreater(str1, str3) | |||
self.assertGreaterEqual(str1, str3) | |||
self.assertNotEqual(str1, str3) | |||
self.assertNotEqual(str1, str3) | |||
self.assertGreaterEqual(str1, str3) | |||
self.assertGreater(str1, str3) | |||
self.assertLessEqual(str1, str4) | |||
self.assertGreaterEqual(str1, str4) | |||
self.assertEqual(str1, str4) | |||
self.assertEqual(str1, str4) | |||
self.assertGreaterEqual(str1, str4) | |||
self.assertLessEqual(str1, str4) | |||
self.assertLessEqual(str5, str1) | |||
self.assertLess(str5, str1) | |||
self.assertNotEqual(str5, str1) | |||
self.assertNotEqual(str5, str1) | |||
self.assertLess(str5, str1) | |||
self.assertLessEqual(str5, str1) | |||
def test_other_magics(self): | |||
"""test other magically implemented features, like len() and iter()""" | |||
@@ -161,13 +144,13 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertRaises(IndexError, lambda: str1[11]) | |||
self.assertRaises(IndexError, lambda: str2[0]) | |||
self.assertTrue("k" in str1) | |||
self.assertTrue("fake" in str1) | |||
self.assertTrue("str" in str1) | |||
self.assertTrue("" in str1) | |||
self.assertTrue("" in str2) | |||
self.assertFalse("real" in str1) | |||
self.assertFalse("s" in str2) | |||
self.assertIn("k", str1) | |||
self.assertIn("fake", str1) | |||
self.assertIn("str", str1) | |||
self.assertIn("", str1) | |||
self.assertIn("", str2) | |||
self.assertNotIn("real", str1) | |||
self.assertNotIn("s", str2) | |||
def test_other_methods(self): | |||
"""test the remaining non-magic methods of StringMixIn""" | |||
@@ -185,14 +168,6 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual(1, str1.count("r", 5, 9)) | |||
self.assertEqual(0, str1.count("r", 5, 7)) | |||
if not py3k: | |||
str2 = _FakeString("fo") | |||
self.assertEqual(str1, str1.decode()) | |||
actual = _FakeString("\\U00010332\\U0001033f\\U00010344") | |||
self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape")) | |||
self.assertRaises(UnicodeError, str2.decode, "punycode") | |||
self.assertEqual("", str2.decode("punycode", "ignore")) | |||
str3 = _FakeString("𐌲𐌿𐍄") | |||
actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" | |||
self.assertEqual(b"fake string", str1.encode()) | |||
@@ -239,10 +214,9 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz")) | |||
self.assertRaises(IndexError, str8.format, "abc") | |||
if py3k: | |||
self.assertEqual("fake string", str1.format_map({})) | |||
self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) | |||
self.assertRaises(ValueError, str5.format_map, {0: "abc"}) | |||
self.assertEqual("fake string", str1.format_map({})) | |||
self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) | |||
self.assertRaises(ValueError, str5.format_map, {0: "abc"}) | |||
self.assertEqual(3, str1.index("e")) | |||
self.assertRaises(ValueError, str1.index, "z") | |||
@@ -275,11 +249,10 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertFalse(str13.isdigit()) | |||
self.assertTrue(str14.isdigit()) | |||
if py3k: | |||
self.assertTrue(str9.isidentifier()) | |||
self.assertTrue(str10.isidentifier()) | |||
self.assertFalse(str11.isidentifier()) | |||
self.assertFalse(str12.isidentifier()) | |||
self.assertTrue(str9.isidentifier()) | |||
self.assertTrue(str10.isidentifier()) | |||
self.assertFalse(str11.isidentifier()) | |||
self.assertFalse(str12.isidentifier()) | |||
str15 = _FakeString("") | |||
str16 = _FakeString("FooBar") | |||
@@ -292,13 +265,12 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertTrue(str13.isnumeric()) | |||
self.assertTrue(str14.isnumeric()) | |||
if py3k: | |||
str16B = _FakeString("\x01\x02") | |||
self.assertTrue(str9.isprintable()) | |||
self.assertTrue(str13.isprintable()) | |||
self.assertTrue(str14.isprintable()) | |||
self.assertTrue(str15.isprintable()) | |||
self.assertFalse(str16B.isprintable()) | |||
str16B = _FakeString("\x01\x02") | |||
self.assertTrue(str9.isprintable()) | |||
self.assertTrue(str13.isprintable()) | |||
self.assertTrue(str14.isprintable()) | |||
self.assertTrue(str15.isprintable()) | |||
self.assertFalse(str16B.isprintable()) | |||
str17 = _FakeString(" ") | |||
str18 = _FakeString("\t \t \r\n") | |||
@@ -329,10 +301,9 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual("", str15.lower()) | |||
self.assertEqual("foobar", str16.lower()) | |||
self.assertEqual("ß", str22.lower()) | |||
if py3k and not py32: | |||
self.assertEqual("", str15.casefold()) | |||
self.assertEqual("foobar", str16.casefold()) | |||
self.assertEqual("ss", str22.casefold()) | |||
self.assertEqual("", str15.casefold()) | |||
self.assertEqual("foobar", str16.casefold()) | |||
self.assertEqual("ss", str22.casefold()) | |||
str23 = _FakeString(" fake string ") | |||
self.assertEqual("fake string", str1.lstrip()) | |||
@@ -378,9 +349,8 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual(actual, str25.rsplit(None, 3)) | |||
actual = [" this is a sentence with", "", "whitespace", ""] | |||
self.assertEqual(actual, str25.rsplit(" ", 3)) | |||
if py3k and not py32: | |||
actual = [" this is a", "sentence", "with", "whitespace"] | |||
self.assertEqual(actual, str25.rsplit(maxsplit=3)) | |||
actual = [" this is a", "sentence", "with", "whitespace"] | |||
self.assertEqual(actual, str25.rsplit(maxsplit=3)) | |||
self.assertEqual("fake string", str1.rstrip()) | |||
self.assertEqual(" fake string", str23.rstrip()) | |||
@@ -396,9 +366,8 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual(actual, str25.split(None, 3)) | |||
actual = ["", "", "", "this is a sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(" ", 3)) | |||
if py3k and not py32: | |||
actual = ["this", "is", "a", "sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(maxsplit=3)) | |||
actual = ["this", "is", "a", "sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(maxsplit=3)) | |||
str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") | |||
self.assertEqual(["lines", "of", "text", "are", "presented", "here"], | |||
@@ -417,17 +386,13 @@ class TestStringMixIn(unittest.TestCase): | |||
self.assertEqual("Fake String", str1.title()) | |||
if py3k: | |||
table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", | |||
111: "4", 117: "5"}) | |||
table2 = StringMixIn.maketrans("aeiou", "12345") | |||
table3 = StringMixIn.maketrans("aeiou", "12345", "rts") | |||
self.assertEqual("f1k2 str3ng", str1.translate(table1)) | |||
self.assertEqual("f1k2 str3ng", str1.translate(table2)) | |||
self.assertEqual("f1k2 3ng", str1.translate(table3)) | |||
else: | |||
table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} | |||
self.assertEqual("f1k2 str3ng", str1.translate(table)) | |||
table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", | |||
111: "4", 117: "5"}) | |||
table2 = StringMixIn.maketrans("aeiou", "12345") | |||
table3 = StringMixIn.maketrans("aeiou", "12345", "rts") | |||
self.assertEqual("f1k2 str3ng", str1.translate(table1)) | |||
self.assertEqual("f1k2 str3ng", str1.translate(table2)) | |||
self.assertEqual("f1k2 3ng", str1.translate(table3)) | |||
self.assertEqual("", str15.upper()) | |||
self.assertEqual("FOOBAR", str16.upper()) | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Tag, Template, Text | |||
from mwparserfromhell.nodes.extras import Attribute | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,15 +19,9 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from difflib import unified_diff | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import HTMLEntity, Template, Text | |||
from mwparserfromhell.nodes.extras import Parameter | |||
from mwparserfromhell import parse | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Text | |||
class TestText(unittest.TestCase): | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.parser import tokens | |||
class TestTokens(unittest.TestCase): | |||
@@ -68,14 +61,9 @@ class TestTokens(unittest.TestCase): | |||
hundredchars = ("earwig" * 100)[:97] + "..." | |||
self.assertEqual("Token()", repr(token1)) | |||
if py3k: | |||
token2repr1 = "Token(foo='bar', baz=123)" | |||
token2repr2 = "Token(baz=123, foo='bar')" | |||
token3repr = "Text(text='" + hundredchars + "')" | |||
else: | |||
token2repr1 = "Token(foo=u'bar', baz=123)" | |||
token2repr2 = "Token(baz=123, foo=u'bar')" | |||
token3repr = "Text(text=u'" + hundredchars + "')" | |||
token2repr1 = "Token(foo='bar', baz=123)" | |||
token2repr2 = "Token(baz=123, foo='bar')" | |||
token3repr = "Text(text='" + hundredchars + "')" | |||
token2repr = repr(token2) | |||
self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) | |||
self.assertEqual(token3repr, repr(token3)) | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,12 +19,7 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
import unittest | |||
from mwparserfromhell.nodes import Template, Text | |||
from mwparserfromhell.utils import parse_anything | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,17 +19,11 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from functools import partial | |||
import re | |||
from types import GeneratorType | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import py3k, str | |||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||
Node, Tag, Template, Text, Wikilink) | |||
from mwparserfromhell.smart_list import SmartList | |||
@@ -192,8 +185,8 @@ class TestWikicode(TreeEqualityTestCase): | |||
self.assertRaises(ValueError, func, fake, "q", recursive=True) | |||
func("{{b}}{{c}}", "w", recursive=False) | |||
func("{{d}}{{e}}", "x", recursive=True) | |||
func(wrap(code4.nodes[-2:]), "y", recursive=False) | |||
func(wrap(code4.nodes[-2:]), "z", recursive=True) | |||
func(Wikicode(code4.nodes[-2:]), "y", recursive=False) | |||
func(Wikicode(code4.nodes[-2:]), "z", recursive=True) | |||
self.assertEqual(expected[3], code4) | |||
self.assertRaises(ValueError, func, "{{c}}{{d}}", "q", recursive=False) | |||
self.assertRaises(ValueError, func, "{{c}}{{d}}", "q", recursive=True) | |||
@@ -222,6 +215,20 @@ class TestWikicode(TreeEqualityTestCase): | |||
func("{{foo}}{{baz}}", "{{lol}}") | |||
self.assertEqual(expected[6], code7) | |||
code8 = parse("== header ==") | |||
func = partial(meth, code8) | |||
sec1, sec2 = code8.get_sections(include_headings=False) | |||
func(sec1, "lead\n") | |||
func(sec2, "\nbody") | |||
self.assertEqual(expected[7], code8) | |||
code9 = parse("{{foo}}") | |||
meth(code9.get_sections()[0], code9.get_sections()[0], "{{bar}}") | |||
meth(code9.get_sections()[0], code9, "{{baz}}") | |||
meth(code9, code9, "{{qux}}") | |||
meth(code9, code9.get_sections()[0], "{{quz}}") | |||
self.assertEqual(expected[8], code9) | |||
def test_insert_before(self): | |||
"""test Wikicode.insert_before()""" | |||
meth = lambda code, *args, **kw: code.insert_before(*args, **kw) | |||
@@ -232,7 +239,10 @@ class TestWikicode(TreeEqualityTestCase): | |||
"{{a}}w{{b}}{{c}}x{{d}}{{e}}{{f}}{{g}}{{h}}yz{{i}}{{j}}", | |||
"{{a|x{{b}}{{c}}|{{f|{{g}}=y{{h}}{{i}}}}}}", | |||
"here cdis {{some abtext and a {{template}}}}", | |||
"{{foo}}{{bar}}{{baz}}{{lol}}{{foo}}{{baz}}"] | |||
"{{foo}}{{bar}}{{baz}}{{lol}}{{foo}}{{baz}}", | |||
"lead\n== header ==\nbody", | |||
"{{quz}}{{qux}}{{baz}}{{bar}}{{foo}}", | |||
] | |||
self._test_search(meth, expected) | |||
def test_insert_after(self): | |||
@@ -245,16 +255,26 @@ class TestWikicode(TreeEqualityTestCase): | |||
"{{a}}{{b}}{{c}}w{{d}}{{e}}x{{f}}{{g}}{{h}}{{i}}{{j}}yz", | |||
"{{a|{{b}}{{c}}x|{{f|{{g}}={{h}}{{i}}y}}}}", | |||
"here is {{somecd text andab a {{template}}}}", | |||
"{{foo}}{{bar}}{{baz}}{{foo}}{{baz}}{{lol}}"] | |||
"{{foo}}{{bar}}{{baz}}{{foo}}{{baz}}{{lol}}", | |||
"lead\n== header ==\nbody", | |||
"{{foo}}{{bar}}{{baz}}{{qux}}{{quz}}", | |||
] | |||
self._test_search(meth, expected) | |||
def test_replace(self): | |||
"""test Wikicode.replace()""" | |||
meth = lambda code, *args, **kw: code.replace(*args, **kw) | |||
expected = [ | |||
"{{a}}xz[[y]]{{e}}", "dcdffe", "{{a|x|{{c|d=y}}}}", | |||
"{{a}}wx{{f}}{{g}}z", "{{a|x|{{f|{{g}}=y}}}}", | |||
"here cd ab a {{template}}}}", "{{foo}}{{bar}}{{baz}}{{lol}}"] | |||
"{{a}}xz[[y]]{{e}}", | |||
"dcdffe", | |||
"{{a|x|{{c|d=y}}}}", | |||
"{{a}}wx{{f}}{{g}}z", | |||
"{{a|x|{{f|{{g}}=y}}}}", | |||
"here cd ab a {{template}}}}", | |||
"{{foo}}{{bar}}{{baz}}{{lol}}", | |||
"lead\n== header ==\nbody", | |||
"{{quz}}", | |||
] | |||
self._test_search(meth, expected) | |||
def test_append(self): | |||
@@ -273,16 +293,25 @@ class TestWikicode(TreeEqualityTestCase): | |||
"""test Wikicode.remove()""" | |||
meth = lambda code, obj, value, **kw: code.remove(obj, **kw) | |||
expected = [ | |||
"{{a}}{{c}}", "", "{{a||{{c|d=}}}}", "{{a}}{{f}}", | |||
"{{a||{{f|{{g}}=}}}}", "here a {{template}}}}", | |||
"{{foo}}{{bar}}{{baz}}"] | |||
"{{a}}{{c}}", | |||
"", | |||
"{{a||{{c|d=}}}}", | |||
"{{a}}{{f}}", | |||
"{{a||{{f|{{g}}=}}}}", | |||
"here a {{template}}}}", | |||
"{{foo}}{{bar}}{{baz}}", | |||
"== header ==", | |||
"", | |||
] | |||
self._test_search(meth, expected) | |||
def test_matches(self): | |||
"""test Wikicode.matches()""" | |||
code1 = parse("Cleanup") | |||
code2 = parse("\nstub<!-- TODO: make more specific -->") | |||
code3 = parse("") | |||
code3 = parse("Hello world!") | |||
code4 = parse("World,_hello?") | |||
code5 = parse("") | |||
self.assertTrue(code1.matches("Cleanup")) | |||
self.assertTrue(code1.matches("cleanup")) | |||
self.assertTrue(code1.matches(" cleanup\n")) | |||
@@ -297,9 +326,15 @@ class TestWikicode(TreeEqualityTestCase): | |||
self.assertFalse(code2.matches(["StuB", "sTUb", "foobar"])) | |||
self.assertTrue(code2.matches(("StuB", "sTUb", "foo", "bar", "Stub"))) | |||
self.assertTrue(code2.matches(["StuB", "sTUb", "foo", "bar", "Stub"])) | |||
self.assertTrue(code3.matches("")) | |||
self.assertTrue(code3.matches("<!-- nothing -->")) | |||
self.assertTrue(code3.matches(("a", "b", ""))) | |||
self.assertTrue(code3.matches("hello world!")) | |||
self.assertTrue(code3.matches("hello_world!")) | |||
self.assertFalse(code3.matches("hello__world!")) | |||
self.assertTrue(code4.matches("World,_hello?")) | |||
self.assertTrue(code4.matches("World, hello?")) | |||
self.assertFalse(code4.matches("World, hello?")) | |||
self.assertTrue(code5.matches("")) | |||
self.assertTrue(code5.matches("<!-- nothing -->")) | |||
self.assertTrue(code5.matches(("a", "b", ""))) | |||
def test_filter_family(self): | |||
"""test the Wikicode.i?filter() family of functions""" | |||
@@ -1,4 +1,3 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
@@ -20,14 +19,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
import unittest2 as unittest | |||
except ImportError: | |||
import unittest | |||
from mwparserfromhell.compat import str | |||
from mwparserfromhell.nodes import Text, Wikilink | |||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||
@@ -353,3 +353,10 @@ name: many_invalid_nested_tags | |||
label: many unending nested tags that should be treated as plain text, followed by valid wikitext (see issues #42, #183) | |||
input: "<b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b>[[{{x}}" | |||
output: [Text(text="<b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b><b>[["), TemplateOpen(), Text(text="x"), TemplateClose()] | |||
--- | |||
name: nested_templates_and_style_tags | |||
label: many nested templates and style tags, testing edge case behavior and error recovery near the recursion depth limit (see issue #224) | |||
input: "{{a|'''}}{{b|1='''c''}}{{d|1='''e''}}{{f|1='''g''}}{{h|1='''i''}}{{j|1='''k''}}{{l|1='''m''}}{{n|1='''o''}}{{p|1='''q''}}{{r|1=''s'''}}{{t|1='''u''}}{{v|1='''w''x'''y'''}}\n{|\n|-\n|'''\n|}" | |||
output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="'''"), TemplateClose(), TemplateOpen(), Text(text="b"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="c"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="d"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="e"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="f"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="g"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="h"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="i"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="j"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="k"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="m"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="o''}}"), TemplateOpen(), Text(text="p"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="q"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), TemplateOpen(), Text(text="r"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="''s'''"), TemplateClose(), TemplateOpen(), Text(text="t"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="'"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="u"), TagOpenClose(), Text(text="i"), TagCloseClose(), TemplateClose(), Text(text="{{v|1="), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="w''x"), TagOpenOpen(wiki_markup="'''"), Text(text="b"), TagCloseOpen(), Text(text="y"), TagOpenClose(), Text(text="b"), TagCloseClose(), TemplateClose(), Text(text="\n"), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="'''\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] |
@@ -408,3 +408,17 @@ name: junk_after_table_row | |||
label: ignore junk on the first line of a table row | |||
input: "{|\n|- foo="bar" | baz\n|blerp\n|}" | |||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="bar"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="|"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="baz"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="blerp\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||
--- | |||
name: incomplete_nested_open_only | |||
label: many nested incomplete tables: table open only | |||
input: "{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|" | |||
output: [Text(text="{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|")] | |||
--- | |||
name: incomplete_nested_open_and_row | |||
label: many nested incomplete tables: table open and row separator (see issue #206) | |||
input: "{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-" | |||
output: [Text(text="{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-")] |