@@ -0,0 +1,9 @@ | |||||
[report] | |||||
exclude_lines = | |||||
pragma: no cover | |||||
raise NotImplementedError() | |||||
partial_branches = | |||||
pragma: no branch | |||||
if py3k: | |||||
if not py3k: | |||||
if py26: |
@@ -1,10 +1,14 @@ | |||||
*.pyc | *.pyc | ||||
*.pyd | |||||
*.so | *.so | ||||
*.dll | *.dll | ||||
*.egg | *.egg | ||||
*.egg-info | *.egg-info | ||||
.coverage | |||||
.DS_Store | .DS_Store | ||||
__pycache__ | __pycache__ | ||||
build | build | ||||
dist | dist | ||||
docs/_build | docs/_build | ||||
scripts/*.log | |||||
htmlcov/ |
@@ -1,8 +1,18 @@ | |||||
language: python | language: python | ||||
python: | python: | ||||
- "2.6" | |||||
- "2.7" | |||||
- "3.2" | |||||
- "3.3" | |||||
install: python setup.py build | |||||
script: python setup.py test -q | |||||
- 2.6 | |||||
- 2.7 | |||||
- 3.2 | |||||
- 3.3 | |||||
- 3.4 | |||||
install: | |||||
- pip install coveralls | |||||
- python setup.py build | |||||
script: | |||||
- coverage run --source=mwparserfromhell setup.py -q test | |||||
after_success: | |||||
- coveralls | |||||
env: | |||||
matrix: | |||||
- WITHOUT_EXTENSION=0 | |||||
- WITHOUT_EXTENSION=1 |
@@ -1,3 +1,41 @@ | |||||
v0.4 (unreleased): | |||||
- The parser now falls back on pure Python mode if C extensions cannot be | |||||
built. This fixes an issue that prevented some Windows users from installing | |||||
the parser. | |||||
- Added support for parsing wikicode tables (patches by David Winegar). | |||||
- Added a script to test for memory leaks in scripts/memtest.py. | |||||
- Added a script to do releases in scripts/release.sh. | |||||
- skip_style_tags can now be passed to mwparserfromhell.parse() (previously, | |||||
only Parser().parse() allowed it). | |||||
- The 'recursive' argument to Wikicode's filter methods now accepts a third | |||||
option, RECURSE_OTHERS, which recurses over all children except instances of | |||||
'forcetype' (for example, `code.filter_templates(code.RECURSE_OTHERS)` | |||||
returns all un-nested templates). | |||||
- The parser now understands HTML tag attributes quoted with single quotes. | |||||
When setting a tag attribute's value, quotes will be added if necessary. As | |||||
part of this, Attribute's 'quoted' attribute has been changed to 'quotes', | |||||
and is now either a string or None. | |||||
- Calling Template.remove() with a Parameter object that is not part of the | |||||
template now raises ValueError instead of doing nothing. | |||||
- Parameters with non-integer keys can no longer be created with | |||||
'showkey=False', nor have the value of this attribute be set to False later. | |||||
- _ListProxy.destroy() has been changed to _ListProxy.detach(), and now works | |||||
in a more useful way. | |||||
- If something goes wrong while parsing, ParserError will now be raised. | |||||
Previously, the parser would produce an unclear BadRoute exception or allow | |||||
an incorrect node tree to be build. | |||||
- Fixed parser bugs involving: | |||||
- nested tags; | |||||
- comments in template names; | |||||
- tags inside of <nowiki> tags. | |||||
- Added tests to ensure that parsed trees convert back to wikicode without | |||||
unintentional modifications. | |||||
- Added support for a NOWEB environment variable, which disables a unit test | |||||
that makes a web call. | |||||
- Test coverage has been improved, and some minor related bugs have been fixed. | |||||
- Updated and fixed some documentation. | |||||
v0.3.3 (released April 22, 2014): | v0.3.3 (released April 22, 2014): | ||||
- Added support for Python 2.6 and 3.4. | - Added support for Python 2.6 and 3.4. | ||||
@@ -1,4 +1,4 @@ | |||||
Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy | Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
of this software and associated documentation files (the "Software"), to deal | of this software and associated documentation files (the "Software"), to deal | ||||
@@ -0,0 +1,2 @@ | |||||
include LICENSE CHANGELOG | |||||
recursive-include tests *.py *.mwtest |
@@ -1,10 +1,14 @@ | |||||
mwparserfromhell | mwparserfromhell | ||||
================ | ================ | ||||
.. image:: https://travis-ci.org/earwig/mwparserfromhell.png?branch=develop | |||||
.. image:: https://img.shields.io/travis/earwig/mwparserfromhell/develop.svg | |||||
:alt: Build Status | :alt: Build Status | ||||
:target: http://travis-ci.org/earwig/mwparserfromhell | :target: http://travis-ci.org/earwig/mwparserfromhell | ||||
.. image:: https://img.shields.io/coveralls/earwig/mwparserfromhell/develop.svg | |||||
:alt: Coverage Status | |||||
:target: https://coveralls.io/r/earwig/mwparserfromhell | |||||
**mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package | **mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package | ||||
that provides an easy-to-use and outrageously powerful parser for MediaWiki_ | that provides an easy-to-use and outrageously powerful parser for MediaWiki_ | ||||
wikicode. It supports Python 2 and Python 3. | wikicode. It supports Python 2 and Python 3. | ||||
@@ -15,19 +19,17 @@ Full documentation is available on ReadTheDocs_. Development occurs on GitHub_. | |||||
Installation | Installation | ||||
------------ | ------------ | ||||
The easiest way to install the parser is through the `Python Package Index`_, | |||||
so you can install the latest release with ``pip install mwparserfromhell`` | |||||
(`get pip`_). Alternatively, get the latest development version:: | |||||
The easiest way to install the parser is through the `Python Package Index`_; | |||||
you can install the latest release with ``pip install mwparserfromhell`` | |||||
(`get pip`_). On Windows, make sure you have the latest version of pip | |||||
installed by running ``pip install --upgrade pip``. | |||||
Alternatively, get the latest development version:: | |||||
git clone https://github.com/earwig/mwparserfromhell.git | git clone https://github.com/earwig/mwparserfromhell.git | ||||
cd mwparserfromhell | cd mwparserfromhell | ||||
python setup.py install | python setup.py install | ||||
If you get ``error: Unable to find vcvarsall.bat`` while installing, this is | |||||
because Windows can't find the compiler for C extensions. Consult this | |||||
`StackOverflow question`_ for help. You can also set ``ext_modules`` in | |||||
``setup.py`` to an empty list to prevent the extension from building. | |||||
You can run the comprehensive unit testing suite with | You can run the comprehensive unit testing suite with | ||||
``python setup.py test -q``. | ``python setup.py test -q``. | ||||
@@ -40,24 +42,24 @@ Normal usage is rather straightforward (where ``text`` is page text):: | |||||
>>> wikicode = mwparserfromhell.parse(text) | >>> wikicode = mwparserfromhell.parse(text) | ||||
``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an | ``wikicode`` is a ``mwparserfromhell.Wikicode`` object, which acts like an | ||||
ordinary ``unicode`` object (or ``str`` in Python 3) with some extra methods. | |||||
ordinary ``str`` object (or ``unicode`` in Python 2) with some extra methods. | |||||
For example:: | For example:: | ||||
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | ||||
>>> wikicode = mwparserfromhell.parse(text) | >>> wikicode = mwparserfromhell.parse(text) | ||||
>>> print wikicode | |||||
>>> print(wikicode) | |||||
I has a template! {{foo|bar|baz|eggs=spam}} See it? | I has a template! {{foo|bar|baz|eggs=spam}} See it? | ||||
>>> templates = wikicode.filter_templates() | >>> templates = wikicode.filter_templates() | ||||
>>> print templates | |||||
>>> print(templates) | |||||
['{{foo|bar|baz|eggs=spam}}'] | ['{{foo|bar|baz|eggs=spam}}'] | ||||
>>> template = templates[0] | >>> template = templates[0] | ||||
>>> print template.name | |||||
>>> print(template.name) | |||||
foo | foo | ||||
>>> print template.params | |||||
>>> print(template.params) | |||||
['bar', 'baz', 'eggs=spam'] | ['bar', 'baz', 'eggs=spam'] | ||||
>>> print template.get(1).value | |||||
>>> print(template.get(1).value) | |||||
bar | bar | ||||
>>> print template.get("eggs").value | |||||
>>> print(template.get("eggs").value) | |||||
spam | spam | ||||
Since nodes can contain other nodes, getting nested templates is trivial:: | Since nodes can contain other nodes, getting nested templates is trivial:: | ||||
@@ -71,14 +73,14 @@ templates manually. This is possible because nodes can contain additional | |||||
``Wikicode`` objects:: | ``Wikicode`` objects:: | ||||
>>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | ||||
>>> print code.filter_templates(recursive=False) | |||||
>>> print(code.filter_templates(recursive=False)) | |||||
['{{foo|this {{includes a|template}}}}'] | ['{{foo|this {{includes a|template}}}}'] | ||||
>>> foo = code.filter_templates(recursive=False)[0] | >>> foo = code.filter_templates(recursive=False)[0] | ||||
>>> print foo.get(1).value | |||||
>>> print(foo.get(1).value) | |||||
this {{includes a|template}} | this {{includes a|template}} | ||||
>>> print foo.get(1).value.filter_templates()[0] | |||||
>>> print(foo.get(1).value.filter_templates()[0]) | |||||
{{includes a|template}} | {{includes a|template}} | ||||
>>> print foo.get(1).value.filter_templates()[0].get(1).value | |||||
>>> print(foo.get(1).value.filter_templates()[0].get(1).value) | |||||
template | template | ||||
Templates can be easily modified to add, remove, or alter params. ``Wikicode`` | Templates can be easily modified to add, remove, or alter params. ``Wikicode`` | ||||
@@ -93,24 +95,24 @@ whitespace:: | |||||
... if template.name.matches("Cleanup") and not template.has("date"): | ... if template.name.matches("Cleanup") and not template.has("date"): | ||||
... template.add("date", "July 2012") | ... template.add("date", "July 2012") | ||||
... | ... | ||||
>>> print code | |||||
>>> print(code) | |||||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} | {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} | ||||
>>> code.replace("{{uncategorized}}", "{{bar-stub}}") | >>> code.replace("{{uncategorized}}", "{{bar-stub}}") | ||||
>>> print code | |||||
>>> print(code) | |||||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | ||||
>>> print code.filter_templates() | |||||
>>> print(code.filter_templates()) | |||||
['{{cleanup|date=July 2012}}', '{{bar-stub}}'] | ['{{cleanup|date=July 2012}}', '{{bar-stub}}'] | ||||
You can then convert ``code`` back into a regular ``unicode`` object (for | |||||
saving the page!) by calling ``unicode()`` on it:: | |||||
You can then convert ``code`` back into a regular ``str`` object (for | |||||
saving the page!) by calling ``str()`` on it:: | |||||
>>> text = unicode(code) | |||||
>>> print text | |||||
>>> text = str(code) | |||||
>>> print(text) | |||||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | ||||
>>> text == code | >>> text == code | ||||
True | True | ||||
Likewise, use ``str(code)`` in Python 3. | |||||
Likewise, use ``unicode(code)`` in Python 2. | |||||
Integration | Integration | ||||
----------- | ----------- | ||||
@@ -119,27 +121,30 @@ Integration | |||||
``Page`` objects have a ``parse`` method that essentially calls | ``Page`` objects have a ``parse`` method that essentially calls | ||||
``mwparserfromhell.parse()`` on ``page.get()``. | ``mwparserfromhell.parse()`` on ``page.get()``. | ||||
If you're using Pywikipedia_, your code might look like this:: | |||||
If you're using Pywikibot_, your code might look like this:: | |||||
import mwparserfromhell | import mwparserfromhell | ||||
import wikipedia as pywikibot | |||||
import pywikibot | |||||
def parse(title): | def parse(title): | ||||
site = pywikibot.getSite() | |||||
site = pywikibot.Site() | |||||
page = pywikibot.Page(site, title) | page = pywikibot.Page(site, title) | ||||
text = page.get() | text = page.get() | ||||
return mwparserfromhell.parse(text) | return mwparserfromhell.parse(text) | ||||
If you're not using a library, you can parse templates in any page using the | |||||
following code (via the API_):: | |||||
If you're not using a library, you can parse any page using the following code | |||||
(via the API_):: | |||||
import json | import json | ||||
import urllib | |||||
from urllib.parse import urlencode | |||||
from urllib.request import urlopen | |||||
import mwparserfromhell | import mwparserfromhell | ||||
API_URL = "http://en.wikipedia.org/w/api.php" | API_URL = "http://en.wikipedia.org/w/api.php" | ||||
def parse(title): | def parse(title): | ||||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | data = {"action": "query", "prop": "revisions", "rvlimit": 1, | ||||
"rvprop": "content", "format": "json", "titles": title} | "rvprop": "content", "format": "json", "titles": title} | ||||
raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read() | |||||
raw = urlopen(API_URL, urlencode(data).encode()).read() | |||||
res = json.loads(raw) | res = json.loads(raw) | ||||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | ||||
return mwparserfromhell.parse(text) | return mwparserfromhell.parse(text) | ||||
@@ -154,5 +159,5 @@ following code (via the API_):: | |||||
.. _StackOverflow question: http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat | .. _StackOverflow question: http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat | ||||
.. _get pip: http://pypi.python.org/pypi/pip | .. _get pip: http://pypi.python.org/pypi/pip | ||||
.. _EarwigBot: https://github.com/earwig/earwigbot | .. _EarwigBot: https://github.com/earwig/earwigbot | ||||
.. _Pywikipedia: https://www.mediawiki.org/wiki/Manual:Pywikipediabot | |||||
.. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot | |||||
.. _API: http://mediawiki.org/wiki/API | .. _API: http://mediawiki.org/wiki/API |
@@ -87,4 +87,3 @@ Subpackages | |||||
.. toctree:: | .. toctree:: | ||||
mwparserfromhell.nodes.extras | mwparserfromhell.nodes.extras | ||||
@@ -15,6 +15,12 @@ mwparserfromhell Package | |||||
:members: | :members: | ||||
:undoc-members: | :undoc-members: | ||||
:mod:`definitions` Module | |||||
------------------------- | |||||
.. automodule:: mwparserfromhell.definitions | |||||
:members: | |||||
:mod:`smart_list` Module | :mod:`smart_list` Module | ||||
------------------------ | ------------------------ | ||||
@@ -30,12 +36,6 @@ mwparserfromhell Package | |||||
:members: | :members: | ||||
:undoc-members: | :undoc-members: | ||||
:mod:`definitions` Module | |||||
.. automodule:: mwparserfromhell.definitions | |||||
:members: | |||||
:mod:`utils` Module | :mod:`utils` Module | ||||
------------------- | ------------------- | ||||
@@ -1,6 +1,54 @@ | |||||
Changelog | Changelog | ||||
========= | ========= | ||||
v0.4 | |||||
---- | |||||
Unreleased | |||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.3...develop>`__): | |||||
- The parser now falls back on pure Python mode if C extensions cannot be | |||||
built. This fixes an issue that prevented some Windows users from installing | |||||
the parser. | |||||
- Added support for parsing wikicode tables (patches by David Winegar). | |||||
- Added a script to test for memory leaks in :file:`scripts/memtest.py`. | |||||
- Added a script to do releases in :file:`scripts/release.sh`. | |||||
- *skip_style_tags* can now be passed to :func:`mwparserfromhell.parse() | |||||
<.parse_anything>` (previously, only :meth:`.Parser.parse` allowed it). | |||||
- The *recursive* argument to :class:`Wikicode's <.Wikicode>` :meth:`.filter` | |||||
methods now accepts a third option, ``RECURSE_OTHERS``, which recurses over | |||||
all children except instances of *forcetype* (for example, | |||||
``code.filter_templates(code.RECURSE_OTHERS)`` returns all un-nested | |||||
templates). | |||||
- The parser now understands HTML tag attributes quoted with single quotes. | |||||
When setting a tag attribute's value, quotes will be added if necessary. As | |||||
part of this, :class:`.Attribute`\ 's :attr:`~.Attribute.quoted` attribute | |||||
has been changed to :attr:`~.Attribute.quotes`, and is now either a string or | |||||
``None``. | |||||
- Calling :meth:`.Template.remove` with a :class:`.Parameter` object that is | |||||
not part of the template now raises :exc:`ValueError` instead of doing | |||||
nothing. | |||||
- :class:`.Parameter`\ s with non-integer keys can no longer be created with | |||||
*showkey=False*, nor have the value of this attribute be set to *False* | |||||
later. | |||||
- :meth:`._ListProxy.destroy` has been changed to :meth:`._ListProxy.detach`, | |||||
and now works in a more useful way. | |||||
- If something goes wrong while parsing, :exc:`.ParserError` will now be | |||||
raised. Previously, the parser would produce an unclear :exc:`.BadRoute` | |||||
exception or allow an incorrect node tree to be build. | |||||
- Fixed parser bugs involving: | |||||
- nested tags; | |||||
- comments in template names; | |||||
- tags inside of ``<nowiki>`` tags. | |||||
- Added tests to ensure that parsed trees convert back to wikicode without | |||||
unintentional modifications. | |||||
- Added support for a :envvar:`NOWEB` environment variable, which disables a | |||||
unit test that makes a web call. | |||||
- Test coverage has been improved, and some minor related bugs have been fixed. | |||||
- Updated and fixed some documentation. | |||||
v0.3.3 | v0.3.3 | ||||
------ | ------ | ||||
@@ -8,22 +56,21 @@ v0.3.3 | |||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.2...v0.3.3>`__): | (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.2...v0.3.3>`__): | ||||
- Added support for Python 2.6 and 3.4. | - Added support for Python 2.6 and 3.4. | ||||
- :py:meth:`.Template.has` is now passed *ignore_empty=False* by default | |||||
- :meth:`.Template.has` is now passed *ignore_empty=False* by default | |||||
instead of *True*. This fixes a bug when adding parameters to templates with | instead of *True*. This fixes a bug when adding parameters to templates with | ||||
empty fields, **and is a breaking change if you rely on the default | empty fields, **and is a breaking change if you rely on the default | ||||
behavior.** | behavior.** | ||||
- The *matches* argument of :py:class:`Wikicode's <.Wikicode>` | |||||
:py:meth:`.filter` methods now accepts a function (taking one argument, a | |||||
:py:class:`.Node`, and returning a bool) in addition to a regex. | |||||
- Re-added *flat* argument to :py:meth:`.Wikicode.get_sections`, fixed the | |||||
order in which it returns sections, and made it faster. | |||||
- :py:meth:`.Wikicode.matches` now accepts a tuple or list of | |||||
strings/:py:class:`.Wikicode` objects instead of just a single string or | |||||
:py:class:`.Wikicode`. | |||||
- The *matches* argument of :class:`Wikicode's <.Wikicode>` :meth:`.filter` | |||||
methods now accepts a function (taking one argument, a :class:`.Node`, and | |||||
returning a bool) in addition to a regex. | |||||
- Re-added *flat* argument to :meth:`.Wikicode.get_sections`, fixed the order | |||||
in which it returns sections, and made it faster. | |||||
- :meth:`.Wikicode.matches` now accepts a tuple or list of | |||||
strings/:class:`.Wikicode` objects instead of just a single string or | |||||
:class:`.Wikicode`. | |||||
- Given the frequency of issues with the (admittedly insufficient) tag parser, | - Given the frequency of issues with the (admittedly insufficient) tag parser, | ||||
there's a temporary *skip_style_tags* argument to | |||||
:py:meth:`~.Parser.parse` that ignores ``''`` and ``'''`` until these issues | |||||
are corrected. | |||||
there's a temporary *skip_style_tags* argument to :meth:`~.Parser.parse` that | |||||
ignores ``''`` and ``'''`` until these issues are corrected. | |||||
- Fixed a parser bug involving nested wikilinks and external links. | - Fixed a parser bug involving nested wikilinks and external links. | ||||
- C code cleanup and speed improvements. | - C code cleanup and speed improvements. | ||||
@@ -34,9 +81,9 @@ v0.3.2 | |||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.1...v0.3.2>`__): | (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.1...v0.3.2>`__): | ||||
- Added support for Python 3.2 (along with current support for 3.3 and 2.7). | - Added support for Python 3.2 (along with current support for 3.3 and 2.7). | ||||
- Renamed :py:meth:`.Template.remove`\ 's first argument from *name* to | |||||
*param*, which now accepts :py:class:`.Parameter` objects in addition to | |||||
parameter name strings. | |||||
- Renamed :meth:`.Template.remove`\ 's first argument from *name* to *param*, | |||||
which now accepts :class:`.Parameter` objects in addition to parameter name | |||||
strings. | |||||
v0.3.1 | v0.3.1 | ||||
------ | ------ | ||||
@@ -53,24 +100,24 @@ v0.3 | |||||
`Released August 24, 2013 <https://github.com/earwig/mwparserfromhell/tree/v0.3>`_ | `Released August 24, 2013 <https://github.com/earwig/mwparserfromhell/tree/v0.3>`_ | ||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.2...v0.3>`__): | (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.2...v0.3>`__): | ||||
- Added complete support for HTML :py:class:`Tags <.Tag>`, including forms like | |||||
- Added complete support for HTML :class:`Tags <.Tag>`, including forms like | |||||
``<ref>foo</ref>``, ``<ref name="bar"/>``, and wiki-markup tags like bold | ``<ref>foo</ref>``, ``<ref name="bar"/>``, and wiki-markup tags like bold | ||||
(``'''``), italics (``''``), and lists (``*``, ``#``, ``;`` and ``:``). | (``'''``), italics (``''``), and lists (``*``, ``#``, ``;`` and ``:``). | ||||
- Added support for :py:class:`.ExternalLink`\ s (``http://example.com/`` and | |||||
- Added support for :class:`.ExternalLink`\ s (``http://example.com/`` and | |||||
``[http://example.com/ Example]``). | ``[http://example.com/ Example]``). | ||||
- :py:class:`Wikicode's <.Wikicode>` :py:meth:`.filter` methods are now passed | |||||
- :class:`Wikicode's <.Wikicode>` :meth:`.filter` methods are now passed | |||||
*recursive=True* by default instead of *False*. **This is a breaking change | *recursive=True* by default instead of *False*. **This is a breaking change | ||||
if you rely on any filter() methods being non-recursive by default.** | if you rely on any filter() methods being non-recursive by default.** | ||||
- Added a :py:meth:`.matches` method to :py:class:`~.Wikicode` for | |||||
page/template name comparisons. | |||||
- The *obj* param of :py:meth:`Wikicode.insert_before() <.insert_before>`, | |||||
:py:meth:`~.insert_after`, :py:meth:`~.Wikicode.replace`, and | |||||
:py:meth:`~.Wikicode.remove` now accepts :py:class:`~.Wikicode` objects and | |||||
strings representing parts of wikitext, instead of just nodes. These methods | |||||
also make all possible substitutions instead of just one. | |||||
- Renamed :py:meth:`Template.has_param() <.has_param>` to | |||||
:py:meth:`~.Template.has` for consistency with :py:class:`~.Template`\ 's | |||||
other methods; :py:meth:`~.has_param` is now an alias. | |||||
- Added a :meth:`.matches` method to :class:`.Wikicode` for page/template name | |||||
comparisons. | |||||
- The *obj* param of :meth:`.Wikicode.insert_before`, :meth:`.insert_after`, | |||||
:meth:`~.Wikicode.replace`, and :meth:`~.Wikicode.remove` now accepts | |||||
:class:`.Wikicode` objects and strings representing parts of wikitext, | |||||
instead of just nodes. These methods also make all possible substitutions | |||||
instead of just one. | |||||
- Renamed :meth:`.Template.has_param` to :meth:`~.Template.has` for consistency | |||||
with :class:`.Template`\ 's other methods; :meth:`.has_param` is now an | |||||
alias. | |||||
- The C tokenizer extension now works on Python 3 in addition to Python 2.7. | - The C tokenizer extension now works on Python 3 in addition to Python 2.7. | ||||
- Various bugfixes, internal changes, and cleanup. | - Various bugfixes, internal changes, and cleanup. | ||||
@@ -83,29 +130,27 @@ v0.2 | |||||
- The parser now fully supports Python 3 in addition to Python 2.7. | - The parser now fully supports Python 3 in addition to Python 2.7. | ||||
- Added a C tokenizer extension that is significantly faster than its Python | - Added a C tokenizer extension that is significantly faster than its Python | ||||
equivalent. It is enabled by default (if available) and can be toggled by | equivalent. It is enabled by default (if available) and can be toggled by | ||||
setting :py:attr:`mwparserfromhell.parser.use_c` to a boolean value. | |||||
setting :attr:`mwparserfromhell.parser.use_c` to a boolean value. | |||||
- Added a complete set of unit tests covering parsing and wikicode | - Added a complete set of unit tests covering parsing and wikicode | ||||
manipulation. | manipulation. | ||||
- Renamed :py:meth:`.filter_links` to :py:meth:`.filter_wikilinks` (applies to | |||||
:py:meth:`.ifilter` as well). | |||||
- Added filter methods for :py:class:`Arguments <.Argument>`, | |||||
:py:class:`Comments <.Comment>`, :py:class:`Headings <.Heading>`, and | |||||
:py:class:`HTMLEntities <.HTMLEntity>`. | |||||
- Added *before* param to :py:meth:`Template.add() <.Template.add>`; renamed | |||||
*force_nonconformity* to *preserve_spacing*. | |||||
- Added *include_lead* param to :py:meth:`Wikicode.get_sections() | |||||
<.get_sections>`. | |||||
- Removed *flat* param from :py:meth:`.get_sections`. | |||||
- Removed *force_no_field* param from :py:meth:`Template.remove() | |||||
<.Template.remove>`. | |||||
- Renamed :meth:`.filter_links` to :meth:`.filter_wikilinks` (applies to | |||||
:meth:`.ifilter` as well). | |||||
- Added filter methods for :class:`Arguments <.Argument>`, | |||||
:class:`Comments <.Comment>`, :class:`Headings <.Heading>`, and | |||||
:class:`HTMLEntities <.HTMLEntity>`. | |||||
- Added *before* param to :meth:`.Template.add`; renamed *force_nonconformity* | |||||
to *preserve_spacing*. | |||||
- Added *include_lead* param to :meth:`.Wikicode.get_sections`. | |||||
- Removed *flat* param from :meth:`.get_sections`. | |||||
- Removed *force_no_field* param from :meth:`.Template.remove`. | |||||
- Added support for Travis CI. | - Added support for Travis CI. | ||||
- Added note about Windows build issue in the README. | - Added note about Windows build issue in the README. | ||||
- The tokenizer will limit itself to a realistic recursion depth to prevent | - The tokenizer will limit itself to a realistic recursion depth to prevent | ||||
errors and unreasonably long parse times. | errors and unreasonably long parse times. | ||||
- Fixed how some nodes' attribute setters handle input. | - Fixed how some nodes' attribute setters handle input. | ||||
- Fixed multiple bugs in the tokenizer's handling of invalid markup. | - Fixed multiple bugs in the tokenizer's handling of invalid markup. | ||||
- Fixed bugs in the implementation of :py:class:`.SmartList` and | |||||
:py:class:`.StringMixIn`. | |||||
- Fixed bugs in the implementation of :class:`.SmartList` and | |||||
:class:`.StringMixIn`. | |||||
- Fixed some broken example code in the README; other copyedits. | - Fixed some broken example code in the README; other copyedits. | ||||
- Other bugfixes and code cleanup. | - Other bugfixes and code cleanup. | ||||
@@ -115,12 +160,12 @@ v0.1.1 | |||||
`Released September 21, 2012 <https://github.com/earwig/mwparserfromhell/tree/v0.1.1>`_ | `Released September 21, 2012 <https://github.com/earwig/mwparserfromhell/tree/v0.1.1>`_ | ||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.1...v0.1.1>`__): | (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.1...v0.1.1>`__): | ||||
- Added support for :py:class:`Comments <.Comment>` (``<!-- foo -->``) and | |||||
:py:class:`Wikilinks <.Wikilink>` (``[[foo]]``). | |||||
- Added corresponding :py:meth:`.ifilter_links` and :py:meth:`.filter_links` | |||||
methods to :py:class:`.Wikicode`. | |||||
- Added support for :class:`Comments <.Comment>` (``<!-- foo -->``) and | |||||
:class:`Wikilinks <.Wikilink>` (``[[foo]]``). | |||||
- Added corresponding :meth:`.ifilter_links` and :meth:`.filter_links` methods | |||||
to :class:`.Wikicode`. | |||||
- Fixed a bug when parsing incomplete templates. | - Fixed a bug when parsing incomplete templates. | ||||
- Fixed :py:meth:`.strip_code` to affect the contents of headings. | |||||
- Fixed :meth:`.strip_code` to affect the contents of headings. | |||||
- Various copyedits in documentation and comments. | - Various copyedits in documentation and comments. | ||||
v0.1 | v0.1 | ||||
@@ -42,7 +42,7 @@ master_doc = 'index' | |||||
# General information about the project. | # General information about the project. | ||||
project = u'mwparserfromhell' | project = u'mwparserfromhell' | ||||
copyright = u'2012, 2013, 2014 Ben Kurtovic' | |||||
copyright = u'2012, 2013, 2014, 2015 Ben Kurtovic' | |||||
# The version info for the project you're documenting, acts as replacement for | # The version info for the project you're documenting, acts as replacement for | ||||
# |version| and |release|, also used in various other places throughout the | # |version| and |release|, also used in various other places throughout the | ||||
@@ -1,9 +1,9 @@ | |||||
MWParserFromHell v\ |version| Documentation | MWParserFromHell v\ |version| Documentation | ||||
=========================================== | =========================================== | ||||
:py:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python | |||||
package that provides an easy-to-use and outrageously powerful parser for | |||||
MediaWiki_ wikicode. It supports Python 2 and Python 3. | |||||
:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python package | |||||
that provides an easy-to-use and outrageously powerful parser for MediaWiki_ | |||||
wikicode. It supports Python 2 and Python 3. | |||||
Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. | Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. | ||||
Development occurs on GitHub_. | Development occurs on GitHub_. | ||||
@@ -1,35 +1,40 @@ | |||||
Integration | Integration | ||||
=========== | =========== | ||||
:py:mod:`mwparserfromhell` is used by and originally developed for EarwigBot_; | |||||
:py:class:`~earwigbot.wiki.page.Page` objects have a | |||||
:py:meth:`~earwigbot.wiki.page.Page.parse` method that essentially calls | |||||
:py:func:`mwparserfromhell.parse() <mwparserfromhell.__init__.parse>` on | |||||
:py:meth:`~earwigbot.wiki.page.Page.get`. | |||||
:mod:`mwparserfromhell` is used by and originally developed for EarwigBot_; | |||||
:class:`~earwigbot.wiki.page.Page` objects have a | |||||
:meth:`~earwigbot.wiki.page.Page.parse` method that essentially calls | |||||
:func:`mwparserfromhell.parse() <mwparserfromhell.__init__.parse>` on | |||||
:meth:`~earwigbot.wiki.page.Page.get`. | |||||
If you're using Pywikipedia_, your code might look like this:: | |||||
If you're using Pywikibot_, your code might look like this:: | |||||
import mwparserfromhell | import mwparserfromhell | ||||
import wikipedia as pywikibot | |||||
import pywikibot | |||||
def parse(title): | def parse(title): | ||||
site = pywikibot.getSite() | |||||
site = pywikibot.Site() | |||||
page = pywikibot.Page(site, title) | page = pywikibot.Page(site, title) | ||||
text = page.get() | text = page.get() | ||||
return mwparserfromhell.parse(text) | return mwparserfromhell.parse(text) | ||||
If you're not using a library, you can parse templates in any page using the | |||||
following code (via the API_):: | |||||
If you're not using a library, you can parse any page using the following code | |||||
(via the API_):: | |||||
import json | import json | ||||
import urllib | |||||
from urllib.parse import urlencode | |||||
from urllib.request import urlopen | |||||
import mwparserfromhell | import mwparserfromhell | ||||
API_URL = "http://en.wikipedia.org/w/api.php" | API_URL = "http://en.wikipedia.org/w/api.php" | ||||
def parse(title): | def parse(title): | ||||
raw = urllib.urlopen(API_URL, data).read() | |||||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||||
"rvprop": "content", "format": "json", "titles": title} | |||||
raw = urlopen(API_URL, urlencode(data).encode()).read() | |||||
res = json.loads(raw) | res = json.loads(raw) | ||||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | ||||
return mwparserfromhell.parse(text) | return mwparserfromhell.parse(text) | ||||
.. _EarwigBot: https://github.com/earwig/earwigbot | .. _EarwigBot: https://github.com/earwig/earwigbot | ||||
.. _Pywikipedia: https://www.mediawiki.org/wiki/Manual:Pywikipediabot | |||||
.. _Pywikibot: https://www.mediawiki.org/wiki/Manual:Pywikibot | |||||
.. _API: http://mediawiki.org/wiki/API | .. _API: http://mediawiki.org/wiki/API |
@@ -6,25 +6,25 @@ Normal usage is rather straightforward (where ``text`` is page text):: | |||||
>>> import mwparserfromhell | >>> import mwparserfromhell | ||||
>>> wikicode = mwparserfromhell.parse(text) | >>> wikicode = mwparserfromhell.parse(text) | ||||
``wikicode`` is a :py:class:`mwparserfromhell.Wikicode <.Wikicode>` object, | |||||
which acts like an ordinary ``unicode`` object (or ``str`` in Python 3) with | |||||
some extra methods. For example:: | |||||
``wikicode`` is a :class:`mwparserfromhell.Wikicode <.Wikicode>` object, which | |||||
acts like an ordinary ``str`` object (or ``unicode`` in Python 2) with some | |||||
extra methods. For example:: | |||||
>>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | ||||
>>> wikicode = mwparserfromhell.parse(text) | >>> wikicode = mwparserfromhell.parse(text) | ||||
>>> print wikicode | |||||
>>> print(wikicode) | |||||
I has a template! {{foo|bar|baz|eggs=spam}} See it? | I has a template! {{foo|bar|baz|eggs=spam}} See it? | ||||
>>> templates = wikicode.filter_templates() | >>> templates = wikicode.filter_templates() | ||||
>>> print templates | |||||
>>> print(templates) | |||||
['{{foo|bar|baz|eggs=spam}}'] | ['{{foo|bar|baz|eggs=spam}}'] | ||||
>>> template = templates[0] | >>> template = templates[0] | ||||
>>> print template.name | |||||
>>> print(template.name) | |||||
foo | foo | ||||
>>> print template.params | |||||
>>> print(template.params) | |||||
['bar', 'baz', 'eggs=spam'] | ['bar', 'baz', 'eggs=spam'] | ||||
>>> print template.get(1).value | |||||
>>> print(template.get(1).value) | |||||
bar | bar | ||||
>>> print template.get("eggs").value | |||||
>>> print(template.get("eggs").value) | |||||
spam | spam | ||||
Since nodes can contain other nodes, getting nested templates is trivial:: | Since nodes can contain other nodes, getting nested templates is trivial:: | ||||
@@ -33,27 +33,27 @@ Since nodes can contain other nodes, getting nested templates is trivial:: | |||||
>>> mwparserfromhell.parse(text).filter_templates() | >>> mwparserfromhell.parse(text).filter_templates() | ||||
['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | ['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] | ||||
You can also pass *recursive=False* to :py:meth:`~.filter_templates` and | |||||
explore templates manually. This is possible because nodes can contain | |||||
additional :py:class:`~.Wikicode` objects:: | |||||
You can also pass *recursive=False* to :meth:`.filter_templates` and explore | |||||
templates manually. This is possible because nodes can contain additional | |||||
:class:`.Wikicode` objects:: | |||||
>>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | ||||
>>> print code.filter_templates(recursive=False) | |||||
>>> print(code.filter_templates(recursive=False)) | |||||
['{{foo|this {{includes a|template}}}}'] | ['{{foo|this {{includes a|template}}}}'] | ||||
>>> foo = code.filter_templates(recursive=False)[0] | >>> foo = code.filter_templates(recursive=False)[0] | ||||
>>> print foo.get(1).value | |||||
>>> print(foo.get(1).value) | |||||
this {{includes a|template}} | this {{includes a|template}} | ||||
>>> print foo.get(1).value.filter_templates()[0] | |||||
>>> print(foo.get(1).value.filter_templates()[0]) | |||||
{{includes a|template}} | {{includes a|template}} | ||||
>>> print foo.get(1).value.filter_templates()[0].get(1).value | |||||
>>> print(foo.get(1).value.filter_templates()[0].get(1).value) | |||||
template | template | ||||
Templates can be easily modified to add, remove, or alter params. | Templates can be easily modified to add, remove, or alter params. | ||||
:py:class:`~.Wikicode` objects can be treated like lists, with | |||||
:py:meth:`~.Wikicode.append`, :py:meth:`~.Wikicode.insert`, | |||||
:py:meth:`~.Wikicode.remove`, :py:meth:`~.Wikicode.replace`, and more. They | |||||
also have a :py:meth:`~.Wikicode.matches` method for comparing page or template | |||||
names, which takes care of capitalization and whitespace:: | |||||
:class:`.Wikicode` objects can be treated like lists, with | |||||
:meth:`~.Wikicode.append`, :meth:`~.Wikicode.insert`, | |||||
:meth:`~.Wikicode.remove`, :meth:`~.Wikicode.replace`, and more. They also have | |||||
a :meth:`~.Wikicode.matches` method for comparing page or template names, which | |||||
takes care of capitalization and whitespace:: | |||||
>>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" | >>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" | ||||
>>> code = mwparserfromhell.parse(text) | >>> code = mwparserfromhell.parse(text) | ||||
@@ -61,24 +61,24 @@ names, which takes care of capitalization and whitespace:: | |||||
... if template.name.matches("Cleanup") and not template.has("date"): | ... if template.name.matches("Cleanup") and not template.has("date"): | ||||
... template.add("date", "July 2012") | ... template.add("date", "July 2012") | ||||
... | ... | ||||
>>> print code | |||||
>>> print(code) | |||||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} | {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} | ||||
>>> code.replace("{{uncategorized}}", "{{bar-stub}}") | >>> code.replace("{{uncategorized}}", "{{bar-stub}}") | ||||
>>> print code | |||||
>>> print(code) | |||||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | ||||
>>> print code.filter_templates() | |||||
>>> print(code.filter_templates()) | |||||
['{{cleanup|date=July 2012}}', '{{bar-stub}}'] | ['{{cleanup|date=July 2012}}', '{{bar-stub}}'] | ||||
You can then convert ``code`` back into a regular :py:class:`unicode` object | |||||
(for saving the page!) by calling :py:func:`unicode` on it:: | |||||
You can then convert ``code`` back into a regular :class:`str` object (for | |||||
saving the page!) by calling :func:`str` on it:: | |||||
>>> text = unicode(code) | |||||
>>> print text | |||||
>>> text = str(code) | |||||
>>> print(text) | |||||
{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} | ||||
>>> text == code | >>> text == code | ||||
True | True | ||||
(Likewise, use :py:func:`str(code) <str>` in Python 3.) | |||||
(Likewise, use :func:`unicode(code) <unicode>` in Python 2.) | |||||
For more tips, check out :py:class:`Wikicode's full method list <.Wikicode>` | |||||
and the :py:mod:`list of Nodes <.nodes>`. | |||||
For more tips, check out :class:`Wikicode's full method list <.Wikicode>` and | |||||
the :mod:`list of Nodes <.nodes>`. |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -26,12 +26,10 @@ Parser from Hell) is a Python package that provides an easy-to-use and | |||||
outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | ||||
""" | """ | ||||
from __future__ import unicode_literals | |||||
__author__ = "Ben Kurtovic" | __author__ = "Ben Kurtovic" | ||||
__copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic" | |||||
__copyright__ = "Copyright (C) 2012, 2013, 2014, 2015 Ben Kurtovic" | |||||
__license__ = "MIT License" | __license__ = "MIT License" | ||||
__version__ = "0.3.3" | |||||
__version__ = "0.4" | |||||
__email__ = "ben.kurtovic@gmail.com" | __email__ = "ben.kurtovic@gmail.com" | ||||
from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | ||||
@@ -2,10 +2,10 @@ | |||||
""" | """ | ||||
Implements support for both Python 2 and Python 3 by defining common types in | Implements support for both Python 2 and Python 3 by defining common types in | ||||
terms of their Python 2/3 variants. For example, :py:class:`str` is set to | |||||
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, | |||||
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These | |||||
types are meant to be imported directly from within the parser's modules. | |||||
terms of their Python 2/3 variants. For example, :class:`str` is set to | |||||
:class:`unicode` on Python 2 but :class:`str` on Python 3; likewise, | |||||
:class:`bytes` is :class:`str` on 2 but :class:`bytes` on 3. These types are | |||||
meant to be imported directly from within the parser's modules. | |||||
""" | """ | ||||
import sys | import sys | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -52,7 +52,7 @@ INVISIBLE_TAGS = [ | |||||
# [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762 | # [mediawiki/core.git]/includes/Sanitizer.php @ 87a0aef762 | ||||
SINGLE_ONLY = ["br", "hr", "meta", "link", "img"] | SINGLE_ONLY = ["br", "hr", "meta", "link", "img"] | ||||
SINGLE = SINGLE_ONLY + ["li", "dt", "dd"] | |||||
SINGLE = SINGLE_ONLY + ["li", "dt", "dd", "th", "td", "tr"] | |||||
MARKUP_TO_HTML = { | MARKUP_TO_HTML = { | ||||
"#": "li", | "#": "li", | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,12 +21,12 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
""" | """ | ||||
This package contains :py:class:`~.Wikicode` "nodes", which represent a single | |||||
unit of wikitext, such as a Template, an HTML tag, a Heading, or plain text. | |||||
The node "tree" is far from flat, as most types can contain additional | |||||
:py:class:`~.Wikicode` types within them - and with that, more nodes. For | |||||
example, the name of a :py:class:`~.Template` is a :py:class:`~.Wikicode` | |||||
object that can contain text or more templates. | |||||
This package contains :class:`.Wikicode` "nodes", which represent a single unit | |||||
of wikitext, such as a Template, an HTML tag, a Heading, or plain text. The | |||||
node "tree" is far from flat, as most types can contain additional | |||||
:class:`.Wikicode` types within them - and with that, more nodes. For example, | |||||
the name of a :class:`.Template` is a :class:`.Wikicode` object that can | |||||
contain text or more templates. | |||||
""" | """ | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
@@ -40,23 +40,23 @@ __all__ = ["Node", "Text", "Argument", "Heading", "HTMLEntity", "Tag", | |||||
class Node(StringMixIn): | class Node(StringMixIn): | ||||
"""Represents the base Node type, demonstrating the methods to override. | """Represents the base Node type, demonstrating the methods to override. | ||||
:py:meth:`__unicode__` must be overridden. It should return a ``unicode`` | |||||
or (``str`` in py3k) representation of the node. If the node contains | |||||
:py:class:`~.Wikicode` objects inside of it, :py:meth:`__children__` | |||||
should be a generator that iterates over them. If the node is printable | |||||
(shown when the page is rendered), :py:meth:`__strip__` should return its | |||||
:meth:`__unicode__` must be overridden. It should return a ``unicode`` or | |||||
(``str`` in py3k) representation of the node. If the node contains | |||||
:class:`.Wikicode` objects inside of it, :meth:`__children__` should be a | |||||
generator that iterates over them. If the node is printable | |||||
(shown when the page is rendered), :meth:`__strip__` should return its | |||||
printable version, stripping out any formatting marks. It does not have to | printable version, stripping out any formatting marks. It does not have to | ||||
return a string, but something that can be converted to a string with | return a string, but something that can be converted to a string with | ||||
``str()``. Finally, :py:meth:`__showtree__` can be overridden to build a | |||||
``str()``. Finally, :meth:`__showtree__` can be overridden to build a | |||||
nice tree representation of the node, if desired, for | nice tree representation of the node, if desired, for | ||||
:py:meth:`~.Wikicode.get_tree`. | |||||
:meth:`~.Wikicode.get_tree`. | |||||
""" | """ | ||||
def __unicode__(self): | def __unicode__(self): | ||||
raise NotImplementedError() | raise NotImplementedError() | ||||
def __children__(self): | def __children__(self): | ||||
return # Funny generator-that-yields-nothing syntax | |||||
yield | |||||
return | |||||
yield # pragma: no cover (this is a generator that yields nothing) | |||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
return None | return None | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -67,12 +67,12 @@ class ExternalLink(Node): | |||||
@property | @property | ||||
def url(self): | def url(self): | ||||
"""The URL of the link target, as a :py:class:`~.Wikicode` object.""" | |||||
"""The URL of the link target, as a :class:`.Wikicode` object.""" | |||||
return self._url | return self._url | ||||
@property | @property | ||||
def title(self): | def title(self): | ||||
"""The link title (if given), as a :py:class:`~.Wikicode` object.""" | |||||
"""The link title (if given), as a :class:`.Wikicode` object.""" | |||||
return self._title | return self._title | ||||
@property | @property | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,9 +21,8 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
""" | """ | ||||
This package contains objects used by | |||||
:py:class:`~.Node`\ s, but are not nodes themselves. This includes the | |||||
parameters of Templates or the attributes of HTML tags. | |||||
This package contains objects used by :class:`.Node`\ s, but that are not nodes | |||||
themselves. This includes template parameters and HTML tag attributes. | |||||
""" | """ | ||||
from .attribute import Attribute | from .attribute import Attribute | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -31,17 +31,19 @@ __all__ = ["Attribute"] | |||||
class Attribute(StringMixIn): | class Attribute(StringMixIn): | ||||
"""Represents an attribute of an HTML tag. | """Represents an attribute of an HTML tag. | ||||
This is used by :py:class:`~.Tag` objects. For example, the tag | |||||
This is used by :class:`.Tag` objects. For example, the tag | |||||
``<ref name="foo">`` contains an Attribute whose name is ``"name"`` and | ``<ref name="foo">`` contains an Attribute whose name is ``"name"`` and | ||||
whose value is ``"foo"``. | whose value is ``"foo"``. | ||||
""" | """ | ||||
def __init__(self, name, value=None, quoted=True, pad_first=" ", | |||||
pad_before_eq="", pad_after_eq=""): | |||||
def __init__(self, name, value=None, quotes='"', pad_first=" ", | |||||
pad_before_eq="", pad_after_eq="", check_quotes=True): | |||||
super(Attribute, self).__init__() | super(Attribute, self).__init__() | ||||
if check_quotes and not quotes and self._value_needs_quotes(value): | |||||
raise ValueError("given value {0!r} requires quotes".format(value)) | |||||
self._name = name | self._name = name | ||||
self._value = value | self._value = value | ||||
self._quoted = quoted | |||||
self._quotes = quotes | |||||
self._pad_first = pad_first | self._pad_first = pad_first | ||||
self._pad_before_eq = pad_before_eq | self._pad_before_eq = pad_before_eq | ||||
self._pad_after_eq = pad_after_eq | self._pad_after_eq = pad_after_eq | ||||
@@ -50,11 +52,18 @@ class Attribute(StringMixIn): | |||||
result = self.pad_first + str(self.name) + self.pad_before_eq | result = self.pad_first + str(self.name) + self.pad_before_eq | ||||
if self.value is not None: | if self.value is not None: | ||||
result += "=" + self.pad_after_eq | result += "=" + self.pad_after_eq | ||||
if self.quoted: | |||||
return result + '"' + str(self.value) + '"' | |||||
if self.quotes: | |||||
return result + self.quotes + str(self.value) + self.quotes | |||||
return result + str(self.value) | return result + str(self.value) | ||||
return result | return result | ||||
@staticmethod | |||||
def _value_needs_quotes(val): | |||||
"""Return the preferred quotes for the given value, or None.""" | |||||
if val and any(char.isspace() for char in val): | |||||
return ('"' in val and "'" in val) or ("'" if '"' in val else '"') | |||||
return None | |||||
def _set_padding(self, attr, value): | def _set_padding(self, attr, value): | ||||
"""Setter for the value of a padding attribute.""" | """Setter for the value of a padding attribute.""" | ||||
if not value: | if not value: | ||||
@@ -65,20 +74,28 @@ class Attribute(StringMixIn): | |||||
raise ValueError("padding must be entirely whitespace") | raise ValueError("padding must be entirely whitespace") | ||||
setattr(self, attr, value) | setattr(self, attr, value) | ||||
@staticmethod | |||||
def coerce_quotes(quotes): | |||||
"""Coerce a quote type into an acceptable value, or raise an error.""" | |||||
orig, quotes = quotes, str(quotes) if quotes else None | |||||
if quotes not in [None, '"', "'"]: | |||||
raise ValueError("{0!r} is not a valid quote type".format(orig)) | |||||
return quotes | |||||
@property | @property | ||||
def name(self): | def name(self): | ||||
"""The name of the attribute as a :py:class:`~.Wikicode` object.""" | |||||
"""The name of the attribute as a :class:`.Wikicode` object.""" | |||||
return self._name | return self._name | ||||
@property | @property | ||||
def value(self): | def value(self): | ||||
"""The value of the attribute as a :py:class:`~.Wikicode` object.""" | |||||
"""The value of the attribute as a :class:`.Wikicode` object.""" | |||||
return self._value | return self._value | ||||
@property | @property | ||||
def quoted(self): | |||||
"""Whether the attribute's value is quoted with double quotes.""" | |||||
return self._quoted | |||||
def quotes(self): | |||||
"""How to enclose the attribute value. ``"``, ``'``, or ``None``.""" | |||||
return self._quotes | |||||
@property | @property | ||||
def pad_first(self): | def pad_first(self): | ||||
@@ -101,11 +118,21 @@ class Attribute(StringMixIn): | |||||
@value.setter | @value.setter | ||||
def value(self, newval): | def value(self, newval): | ||||
self._value = None if newval is None else parse_anything(newval) | |||||
@quoted.setter | |||||
def quoted(self, value): | |||||
self._quoted = bool(value) | |||||
if newval is None: | |||||
self._value = None | |||||
else: | |||||
code = parse_anything(newval) | |||||
quotes = self._value_needs_quotes(code) | |||||
if quotes in ['"', "'"] or (quotes is True and not self.quotes): | |||||
self._quotes = quotes | |||||
self._value = code | |||||
@quotes.setter | |||||
def quotes(self, value): | |||||
value = self.coerce_quotes(value) | |||||
if not value and self._value_needs_quotes(self.value): | |||||
raise ValueError("attribute value requires quotes") | |||||
self._quotes = value | |||||
@pad_first.setter | @pad_first.setter | ||||
def pad_first(self, value): | def pad_first(self, value): | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,6 +21,7 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import re | |||||
from ...compat import str | from ...compat import str | ||||
from ...string_mixin import StringMixIn | from ...string_mixin import StringMixIn | ||||
@@ -39,6 +40,8 @@ class Parameter(StringMixIn): | |||||
def __init__(self, name, value, showkey=True): | def __init__(self, name, value, showkey=True): | ||||
super(Parameter, self).__init__() | super(Parameter, self).__init__() | ||||
if not showkey and not self.can_hide_key(name): | |||||
raise ValueError("key {0!r} cannot be hidden".format(name)) | |||||
self._name = name | self._name = name | ||||
self._value = value | self._value = value | ||||
self._showkey = showkey | self._showkey = showkey | ||||
@@ -48,14 +51,19 @@ class Parameter(StringMixIn): | |||||
return str(self.name) + "=" + str(self.value) | return str(self.name) + "=" + str(self.value) | ||||
return str(self.value) | return str(self.value) | ||||
@staticmethod | |||||
def can_hide_key(key): | |||||
"""Return whether or not the given key can be hidden.""" | |||||
return re.match(r"[1-9][0-9]*$", str(key).strip()) | |||||
@property | @property | ||||
def name(self): | def name(self): | ||||
"""The name of the parameter as a :py:class:`~.Wikicode` object.""" | |||||
"""The name of the parameter as a :class:`.Wikicode` object.""" | |||||
return self._name | return self._name | ||||
@property | @property | ||||
def value(self): | def value(self): | ||||
"""The value of the parameter as a :py:class:`~.Wikicode` object.""" | |||||
"""The value of the parameter as a :class:`.Wikicode` object.""" | |||||
return self._value | return self._value | ||||
@property | @property | ||||
@@ -73,4 +81,7 @@ class Parameter(StringMixIn): | |||||
@showkey.setter | @showkey.setter | ||||
def showkey(self, newval): | def showkey(self, newval): | ||||
self._showkey = bool(newval) | |||||
newval = bool(newval) | |||||
if not newval and not self.can_hide_key(self.name): | |||||
raise ValueError("parameter key cannot be hidden") | |||||
self._showkey = newval |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -52,7 +52,7 @@ class Heading(Node): | |||||
@property | @property | ||||
def title(self): | def title(self): | ||||
"""The title of the heading, as a :py:class:`~.Wikicode` object.""" | |||||
"""The title of the heading, as a :class:`.Wikicode` object.""" | |||||
return self._title | return self._title | ||||
@property | @property | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -77,17 +77,17 @@ class HTMLEntity(Node): | |||||
# Test whether we're on the wide or narrow Python build. Check | # Test whether we're on the wide or narrow Python build. Check | ||||
# the length of a non-BMP code point | # the length of a non-BMP code point | ||||
# (U+1F64A, SPEAK-NO-EVIL MONKEY): | # (U+1F64A, SPEAK-NO-EVIL MONKEY): | ||||
if len("\U0001F64A") == 2: | |||||
# Ensure this is within the range we can encode: | |||||
if value > 0x10FFFF: | |||||
raise ValueError("unichr() arg not in range(0x110000)") | |||||
code = value - 0x10000 | |||||
if value < 0: # Invalid code point | |||||
raise | |||||
lead = 0xD800 + (code >> 10) | |||||
trail = 0xDC00 + (code % (1 << 10)) | |||||
return unichr(lead) + unichr(trail) | |||||
raise | |||||
if len("\U0001F64A") == 1: # pragma: no cover | |||||
raise | |||||
# Ensure this is within the range we can encode: | |||||
if value > 0x10FFFF: | |||||
raise ValueError("unichr() arg not in range(0x110000)") | |||||
code = value - 0x10000 | |||||
if value < 0: # Invalid code point | |||||
raise | |||||
lead = 0xD800 + (code >> 10) | |||||
trail = 0xDC00 + (code % (1 << 10)) | |||||
return unichr(lead) + unichr(trail) | |||||
@property | @property | ||||
def value(self): | def value(self): | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -35,7 +35,8 @@ class Tag(Node): | |||||
def __init__(self, tag, contents=None, attrs=None, wiki_markup=None, | def __init__(self, tag, contents=None, attrs=None, wiki_markup=None, | ||||
self_closing=False, invalid=False, implicit=False, padding="", | self_closing=False, invalid=False, implicit=False, padding="", | ||||
closing_tag=None): | |||||
closing_tag=None, wiki_style_separator=None, | |||||
closing_wiki_markup=None): | |||||
super(Tag, self).__init__() | super(Tag, self).__init__() | ||||
self._tag = tag | self._tag = tag | ||||
if contents is None and not self_closing: | if contents is None and not self_closing: | ||||
@@ -52,13 +53,28 @@ class Tag(Node): | |||||
self._closing_tag = closing_tag | self._closing_tag = closing_tag | ||||
else: | else: | ||||
self._closing_tag = tag | self._closing_tag = tag | ||||
self._wiki_style_separator = wiki_style_separator | |||||
if closing_wiki_markup is not None: | |||||
self._closing_wiki_markup = closing_wiki_markup | |||||
elif wiki_markup and not self_closing: | |||||
self._closing_wiki_markup = wiki_markup | |||||
else: | |||||
self._closing_wiki_markup = None | |||||
def __unicode__(self): | def __unicode__(self): | ||||
if self.wiki_markup: | if self.wiki_markup: | ||||
if self.attributes: | |||||
attrs = "".join([str(attr) for attr in self.attributes]) | |||||
else: | |||||
attrs = "" | |||||
padding = self.padding or "" | |||||
separator = self.wiki_style_separator or "" | |||||
close = self.closing_wiki_markup or "" | |||||
if self.self_closing: | if self.self_closing: | ||||
return self.wiki_markup | |||||
return self.wiki_markup + attrs + padding + separator | |||||
else: | else: | ||||
return self.wiki_markup + str(self.contents) + self.wiki_markup | |||||
return self.wiki_markup + attrs + padding + separator + \ | |||||
str(self.contents) + close | |||||
result = ("</" if self.invalid else "<") + str(self.tag) | result = ("</" if self.invalid else "<") + str(self.tag) | ||||
if self.attributes: | if self.attributes: | ||||
@@ -73,10 +89,10 @@ class Tag(Node): | |||||
def __children__(self): | def __children__(self): | ||||
if not self.wiki_markup: | if not self.wiki_markup: | ||||
yield self.tag | yield self.tag | ||||
for attr in self.attributes: | |||||
yield attr.name | |||||
if attr.value is not None: | |||||
yield attr.value | |||||
for attr in self.attributes: | |||||
yield attr.name | |||||
if attr.value is not None: | |||||
yield attr.value | |||||
if self.contents: | if self.contents: | ||||
yield self.contents | yield self.contents | ||||
if not self.self_closing and not self.wiki_markup and self.closing_tag: | if not self.self_closing and not self.wiki_markup and self.closing_tag: | ||||
@@ -108,19 +124,19 @@ class Tag(Node): | |||||
@property | @property | ||||
def tag(self): | def tag(self): | ||||
"""The tag itself, as a :py:class:`~.Wikicode` object.""" | |||||
"""The tag itself, as a :class:`.Wikicode` object.""" | |||||
return self._tag | return self._tag | ||||
@property | @property | ||||
def contents(self): | def contents(self): | ||||
"""The contents of the tag, as a :py:class:`~.Wikicode` object.""" | |||||
"""The contents of the tag, as a :class:`.Wikicode` object.""" | |||||
return self._contents | return self._contents | ||||
@property | @property | ||||
def attributes(self): | def attributes(self): | ||||
"""The list of attributes affecting the tag. | """The list of attributes affecting the tag. | ||||
Each attribute is an instance of :py:class:`~.Attribute`. | |||||
Each attribute is an instance of :class:`.Attribute`. | |||||
""" | """ | ||||
return self._attrs | return self._attrs | ||||
@@ -146,7 +162,7 @@ class Tag(Node): | |||||
This makes the tag look like a lone close tag. It is technically | This makes the tag look like a lone close tag. It is technically | ||||
invalid and is only parsable Wikicode when the tag itself is | invalid and is only parsable Wikicode when the tag itself is | ||||
single-only, like ``<br>`` and ``<img>``. See | single-only, like ``<br>`` and ``<img>``. See | ||||
:py:func:`.definitions.is_single_only`. | |||||
:func:`.definitions.is_single_only`. | |||||
""" | """ | ||||
return self._invalid | return self._invalid | ||||
@@ -155,8 +171,8 @@ class Tag(Node): | |||||
"""Whether the tag is implicitly self-closing, with no ending slash. | """Whether the tag is implicitly self-closing, with no ending slash. | ||||
This is only possible for specific "single" tags like ``<br>`` and | This is only possible for specific "single" tags like ``<br>`` and | ||||
``<li>``. See :py:func:`.definitions.is_single`. This field only has an | |||||
effect if :py:attr:`self_closing` is also ``True``. | |||||
``<li>``. See :func:`.definitions.is_single`. This field only has an | |||||
effect if :attr:`self_closing` is also ``True``. | |||||
""" | """ | ||||
return self._implicit | return self._implicit | ||||
@@ -167,13 +183,34 @@ class Tag(Node): | |||||
@property | @property | ||||
def closing_tag(self): | def closing_tag(self): | ||||
"""The closing tag, as a :py:class:`~.Wikicode` object. | |||||
"""The closing tag, as a :class:`.Wikicode` object. | |||||
This will usually equal :py:attr:`tag`, unless there is additional | |||||
This will usually equal :attr:`tag`, unless there is additional | |||||
spacing, comments, or the like. | spacing, comments, or the like. | ||||
""" | """ | ||||
return self._closing_tag | return self._closing_tag | ||||
@property | |||||
def wiki_style_separator(self): | |||||
"""The separator between the padding and content in a wiki markup tag. | |||||
Essentially the wiki equivalent of the TagCloseOpen. | |||||
""" | |||||
return self._wiki_style_separator | |||||
@property | |||||
def closing_wiki_markup(self): | |||||
"""The wikified version of the closing tag to show instead of HTML. | |||||
If set to a value, this will be displayed instead of the close tag | |||||
brackets. If tag is :attr:`self_closing` is ``True`` then this is not | |||||
displayed. If :attr:`wiki_markup` is set and this has not been set, this | |||||
is set to the value of :attr:`wiki_markup`. If this has been set and | |||||
:attr:`wiki_markup` is set to a ``False`` value, this is set to | |||||
``None``. | |||||
""" | |||||
return self._closing_wiki_markup | |||||
@tag.setter | @tag.setter | ||||
def tag(self, value): | def tag(self, value): | ||||
self._tag = self._closing_tag = parse_anything(value) | self._tag = self._closing_tag = parse_anything(value) | ||||
@@ -185,6 +222,8 @@ class Tag(Node): | |||||
@wiki_markup.setter | @wiki_markup.setter | ||||
def wiki_markup(self, value): | def wiki_markup(self, value): | ||||
self._wiki_markup = str(value) if value else None | self._wiki_markup = str(value) if value else None | ||||
if not value or not self.closing_wiki_markup: | |||||
self._closing_wiki_markup = self._wiki_markup | |||||
@self_closing.setter | @self_closing.setter | ||||
def self_closing(self, value): | def self_closing(self, value): | ||||
@@ -212,6 +251,14 @@ class Tag(Node): | |||||
def closing_tag(self, value): | def closing_tag(self, value): | ||||
self._closing_tag = parse_anything(value) | self._closing_tag = parse_anything(value) | ||||
@wiki_style_separator.setter | |||||
def wiki_style_separator(self, value): | |||||
self._wiki_style_separator = str(value) if value else None | |||||
@closing_wiki_markup.setter | |||||
def closing_wiki_markup(self, value): | |||||
self._closing_wiki_markup = str(value) if value else None | |||||
def has(self, name): | def has(self, name): | ||||
"""Return whether any attribute in the tag has the given *name*. | """Return whether any attribute in the tag has the given *name*. | ||||
@@ -226,8 +273,8 @@ class Tag(Node): | |||||
def get(self, name): | def get(self, name): | ||||
"""Get the attribute with the given *name*. | """Get the attribute with the given *name*. | ||||
The returned object is a :py:class:`~.Attribute` instance. Raises | |||||
:py:exc:`ValueError` if no attribute has this name. Since multiple | |||||
The returned object is a :class:`.Attribute` instance. Raises | |||||
:exc:`ValueError` if no attribute has this name. Since multiple | |||||
attributes can have the same name, we'll return the last match, since | attributes can have the same name, we'll return the last match, since | ||||
all but the last are ignored by the MediaWiki parser. | all but the last are ignored by the MediaWiki parser. | ||||
""" | """ | ||||
@@ -236,21 +283,24 @@ class Tag(Node): | |||||
return attr | return attr | ||||
raise ValueError(name) | raise ValueError(name) | ||||
def add(self, name, value=None, quoted=True, pad_first=" ", | |||||
def add(self, name, value=None, quotes='"', pad_first=" ", | |||||
pad_before_eq="", pad_after_eq=""): | pad_before_eq="", pad_after_eq=""): | ||||
"""Add an attribute with the given *name* and *value*. | """Add an attribute with the given *name* and *value*. | ||||
*name* and *value* can be anything parasable by | |||||
:py:func:`.utils.parse_anything`; *value* can be omitted if the | |||||
attribute is valueless. *quoted* is a bool telling whether to wrap the | |||||
*value* in double quotes (this is recommended). *pad_first*, | |||||
*pad_before_eq*, and *pad_after_eq* are whitespace used as padding | |||||
before the name, before the equal sign (or after the name if no value), | |||||
and after the equal sign (ignored if no value), respectively. | |||||
*name* and *value* can be anything parsable by | |||||
:func:`.utils.parse_anything`; *value* can be omitted if the attribute | |||||
is valueless. If *quotes* is not ``None``, it should be a string | |||||
(either ``"`` or ``'``) that *value* will be wrapped in (this is | |||||
recommended). ``None`` is only legal if *value* contains no spacing. | |||||
*pad_first*, *pad_before_eq*, and *pad_after_eq* are whitespace used as | |||||
padding before the name, before the equal sign (or after the name if no | |||||
value), and after the equal sign (ignored if no value), respectively. | |||||
""" | """ | ||||
if value is not None: | if value is not None: | ||||
value = parse_anything(value) | value = parse_anything(value) | ||||
attr = Attribute(parse_anything(name), value, quoted) | |||||
quotes = Attribute.coerce_quotes(quotes) | |||||
attr = Attribute(parse_anything(name), value, quotes) | |||||
attr.pad_first = pad_first | attr.pad_first = pad_first | ||||
attr.pad_before_eq = pad_before_eq | attr.pad_before_eq = pad_before_eq | ||||
attr.pad_after_eq = pad_after_eq | attr.pad_after_eq = pad_after_eq | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -95,7 +95,7 @@ class Template(Node): | |||||
def _select_theory(self, theories): | def _select_theory(self, theories): | ||||
"""Return the most likely spacing convention given different options. | """Return the most likely spacing convention given different options. | ||||
Given a dictionary of convention options as keys and their occurance as | |||||
Given a dictionary of convention options as keys and their occurrence as | |||||
values, return the convention that occurs the most, or ``None`` if | values, return the convention that occurs the most, or ``None`` if | ||||
there is no clear preferred style. | there is no clear preferred style. | ||||
""" | """ | ||||
@@ -110,8 +110,8 @@ class Template(Node): | |||||
"""Try to determine the whitespace conventions for parameters. | """Try to determine the whitespace conventions for parameters. | ||||
This will examine the existing parameters and use | This will examine the existing parameters and use | ||||
:py:meth:`_select_theory` to determine if there are any preferred | |||||
styles for how much whitespace to put before or after the value. | |||||
:meth:`_select_theory` to determine if there are any preferred styles | |||||
for how much whitespace to put before or after the value. | |||||
""" | """ | ||||
before_theories = defaultdict(lambda: 0) | before_theories = defaultdict(lambda: 0) | ||||
after_theories = defaultdict(lambda: 0) | after_theories = defaultdict(lambda: 0) | ||||
@@ -155,10 +155,11 @@ class Template(Node): | |||||
else: | else: | ||||
self.params.pop(i) | self.params.pop(i) | ||||
return | return | ||||
raise ValueError(needle) | |||||
@property | @property | ||||
def name(self): | def name(self): | ||||
"""The name of the template, as a :py:class:`~.Wikicode` object.""" | |||||
"""The name of the template, as a :class:`.Wikicode` object.""" | |||||
return self._name | return self._name | ||||
@property | @property | ||||
@@ -188,13 +189,13 @@ class Template(Node): | |||||
has_param = lambda self, name, ignore_empty=False: \ | has_param = lambda self, name, ignore_empty=False: \ | ||||
self.has(name, ignore_empty) | self.has(name, ignore_empty) | ||||
has_param.__doc__ = "Alias for :py:meth:`has`." | |||||
has_param.__doc__ = "Alias for :meth:`has`." | |||||
def get(self, name): | def get(self, name): | ||||
"""Get the parameter whose name is *name*. | """Get the parameter whose name is *name*. | ||||
The returned object is a :py:class:`~.Parameter` instance. Raises | |||||
:py:exc:`ValueError` if no parameter has this name. Since multiple | |||||
The returned object is a :class:`.Parameter` instance. Raises | |||||
:exc:`ValueError` if no parameter has this name. Since multiple | |||||
parameters can have the same name, we'll return the last match, since | parameters can have the same name, we'll return the last match, since | ||||
the last parameter is the only one read by the MediaWiki parser. | the last parameter is the only one read by the MediaWiki parser. | ||||
""" | """ | ||||
@@ -208,9 +209,9 @@ class Template(Node): | |||||
preserve_spacing=True): | preserve_spacing=True): | ||||
"""Add a parameter to the template with a given *name* and *value*. | """Add a parameter to the template with a given *name* and *value*. | ||||
*name* and *value* can be anything parasable by | |||||
:py:func:`.utils.parse_anything`; pipes and equal signs are | |||||
automatically escaped from *value* when appropriate. | |||||
*name* and *value* can be anything parsable by | |||||
:func:`.utils.parse_anything`; pipes and equal signs are automatically | |||||
escaped from *value* when appropriate. | |||||
If *showkey* is given, this will determine whether or not to show the | If *showkey* is given, this will determine whether or not to show the | ||||
parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of | parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of | ||||
@@ -220,13 +221,13 @@ class Template(Node): | |||||
If *name* is already a parameter in the template, we'll replace its | If *name* is already a parameter in the template, we'll replace its | ||||
value while keeping the same whitespace around it. We will also try to | value while keeping the same whitespace around it. We will also try to | ||||
guess the dominant spacing convention when adding a new parameter using | guess the dominant spacing convention when adding a new parameter using | ||||
:py:meth:`_get_spacing_conventions`. | |||||
:meth:`_get_spacing_conventions`. | |||||
If *before* is given (either a :py:class:`~.Parameter` object or a | |||||
name), then we will place the parameter immediately before this one. | |||||
If *before* is given (either a :class:`.Parameter` object or a name), | |||||
then we will place the parameter immediately before this one. | |||||
Otherwise, it will be added at the end. If *before* is a name and | Otherwise, it will be added at the end. If *before* is a name and | ||||
exists multiple times in the template, we will place it before the last | exists multiple times in the template, we will place it before the last | ||||
occurance. If *before* is not in the template, :py:exc:`ValueError` is | |||||
occurrence. If *before* is not in the template, :exc:`ValueError` is | |||||
raised. The argument is ignored if the new parameter already exists. | raised. The argument is ignored if the new parameter already exists. | ||||
If *preserve_spacing* is ``False``, we will avoid preserving spacing | If *preserve_spacing* is ``False``, we will avoid preserving spacing | ||||
@@ -254,21 +255,19 @@ class Template(Node): | |||||
return existing | return existing | ||||
if showkey is None: | if showkey is None: | ||||
try: | |||||
if Parameter.can_hide_key(name): | |||||
int_name = int(str(name)) | int_name = int(str(name)) | ||||
except ValueError: | |||||
showkey = True | |||||
else: | |||||
int_keys = set() | int_keys = set() | ||||
for param in self.params: | for param in self.params: | ||||
if not param.showkey: | if not param.showkey: | ||||
if re.match(r"[1-9][0-9]*$", param.name.strip()): | |||||
int_keys.add(int(str(param.name))) | |||||
int_keys.add(int(str(param.name))) | |||||
expected = min(set(range(1, len(int_keys) + 2)) - int_keys) | expected = min(set(range(1, len(int_keys) + 2)) - int_keys) | ||||
if expected == int_name: | if expected == int_name: | ||||
showkey = False | showkey = False | ||||
else: | else: | ||||
showkey = True | showkey = True | ||||
else: | |||||
showkey = True | |||||
if not showkey: | if not showkey: | ||||
self._surface_escape(value, "=") | self._surface_escape(value, "=") | ||||
@@ -290,9 +289,9 @@ class Template(Node): | |||||
def remove(self, param, keep_field=False): | def remove(self, param, keep_field=False): | ||||
"""Remove a parameter from the template, identified by *param*. | """Remove a parameter from the template, identified by *param*. | ||||
If *param* is a :py:class:`.Parameter` object, it will be matched | |||||
exactly, otherwise it will be treated like the *name* argument to | |||||
:py:meth:`has` and :py:meth:`get`. | |||||
If *param* is a :class:`.Parameter` object, it will be matched exactly, | |||||
otherwise it will be treated like the *name* argument to :meth:`has` | |||||
and :meth:`get`. | |||||
If *keep_field* is ``True``, we will keep the parameter's name, but | If *keep_field* is ``True``, we will keep the parameter's name, but | ||||
blank its value. Otherwise, we will remove the parameter completely | blank its value. Otherwise, we will remove the parameter completely | ||||
@@ -301,7 +300,7 @@ class Template(Node): | |||||
we expected, so ``{{foo||baz}}`` will be produced instead). | we expected, so ``{{foo||baz}}`` will be produced instead). | ||||
If the parameter shows up multiple times in the template and *param* is | If the parameter shows up multiple times in the template and *param* is | ||||
not a :py:class:`.Parameter` object, we will remove all instances of it | |||||
not a :class:`.Parameter` object, we will remove all instances of it | |||||
(and keep only one if *keep_field* is ``True`` - the first instance if | (and keep only one if *keep_field* is ``True`` - the first instance if | ||||
none have dependents, otherwise the one with dependents will be kept). | none have dependents, otherwise the one with dependents will be kept). | ||||
""" | """ | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -62,12 +62,12 @@ class Wikilink(Node): | |||||
@property | @property | ||||
def title(self): | def title(self): | ||||
"""The title of the linked page, as a :py:class:`~.Wikicode` object.""" | |||||
"""The title of the linked page, as a :class:`.Wikicode` object.""" | |||||
return self._title | return self._title | ||||
@property | @property | ||||
def text(self): | def text(self): | ||||
"""The text to display (if any), as a :py:class:`~.Wikicode` object.""" | |||||
"""The text to display (if any), as a :class:`.Wikicode` object.""" | |||||
return self._text | return self._text | ||||
@title.setter | @title.setter | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -22,10 +22,23 @@ | |||||
""" | """ | ||||
This package contains the actual wikicode parser, split up into two main | This package contains the actual wikicode parser, split up into two main | ||||
modules: the :py:mod:`~.tokenizer` and the :py:mod:`~.builder`. This module | |||||
joins them together under one interface. | |||||
modules: the :mod:`.tokenizer` and the :mod:`.builder`. This module joins them | |||||
together into one interface. | |||||
""" | """ | ||||
class ParserError(Exception): | |||||
"""Exception raised when an internal error occurs while parsing. | |||||
This does not mean that the wikicode was invalid, because invalid markup | |||||
should still be parsed correctly. This means that the parser caught itself | |||||
with an impossible internal state and is bailing out before other problems | |||||
can happen. Its appearance indicates a bug. | |||||
""" | |||||
def __init__(self, extra): | |||||
msg = "This is a bug and should be reported. Info: {0}.".format(extra) | |||||
super(ParserError, self).__init__(msg) | |||||
from .builder import Builder | from .builder import Builder | ||||
from .tokenizer import Tokenizer | from .tokenizer import Tokenizer | ||||
try: | try: | ||||
@@ -35,15 +48,22 @@ except ImportError: | |||||
CTokenizer = None | CTokenizer = None | ||||
use_c = False | use_c = False | ||||
__all__ = ["use_c", "Parser"] | |||||
__all__ = ["use_c", "Parser", "ParserError"] | |||||
class Parser(object): | class Parser(object): | ||||
"""Represents a parser for wikicode. | """Represents a parser for wikicode. | ||||
Actual parsing is a two-step process: first, the text is split up into a | Actual parsing is a two-step process: first, the text is split up into a | ||||
series of tokens by the :py:class:`~.Tokenizer`, and then the tokens are | |||||
converted into trees of :py:class:`~.Wikicode` objects and | |||||
:py:class:`~.Node`\ s by the :py:class:`~.Builder`. | |||||
series of tokens by the :class:`.Tokenizer`, and then the tokens are | |||||
converted into trees of :class:`.Wikicode` objects and :class:`.Node`\ s by | |||||
the :class:`.Builder`. | |||||
Instances of this class or its dependents (:class:`.Tokenizer` and | |||||
:class:`.Builder`) should not be shared between threads. :meth:`parse` can | |||||
be called multiple times as long as it is not done concurrently. In | |||||
general, there is no need to do this because parsing should be done through | |||||
:func:`mwparserfromhell.parse`, which creates a new :class:`.Parser` object | |||||
as necessary. | |||||
""" | """ | ||||
def __init__(self): | def __init__(self): | ||||
@@ -54,10 +74,20 @@ class Parser(object): | |||||
self._builder = Builder() | self._builder = Builder() | ||||
def parse(self, text, context=0, skip_style_tags=False): | def parse(self, text, context=0, skip_style_tags=False): | ||||
"""Parse *text*, returning a :py:class:`~.Wikicode` object tree. | |||||
"""Parse *text*, returning a :class:`.Wikicode` object tree. | |||||
If given, *context* will be passed as a starting context to the parser. | |||||
This is helpful when this function is used inside node attribute | |||||
setters. For example, :class:`.ExternalLink`\ 's | |||||
:attr:`~.ExternalLink.url` setter sets *context* to | |||||
:mod:`contexts.EXT_LINK_URI <.contexts>` to prevent the URL itself | |||||
from becoming an :class:`.ExternalLink`. | |||||
If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be | If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be | ||||
parsed, but instead be treated as plain text. | |||||
parsed, but instead will be treated as plain text. | |||||
If there is an internal error while parsing, :exc:`.ParserError` will | |||||
be raised. | |||||
""" | """ | ||||
tokens = self._tokenizer.tokenize(text, context, skip_style_tags) | tokens = self._tokenizer.tokenize(text, context, skip_style_tags) | ||||
code = self._builder.build(tokens) | code = self._builder.build(tokens) | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -22,7 +22,7 @@ | |||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from . import tokens | |||||
from . import tokens, ParserError | |||||
from ..compat import str | from ..compat import str | ||||
from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, | from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, | ||||
Template, Text, Wikilink) | Template, Text, Wikilink) | ||||
@@ -32,34 +32,42 @@ from ..wikicode import Wikicode | |||||
__all__ = ["Builder"] | __all__ = ["Builder"] | ||||
_HANDLERS = { | |||||
tokens.Text: lambda self, token: Text(token.text) | |||||
} | |||||
def _add_handler(token_type): | |||||
"""Create a decorator that adds a handler function to the lookup table.""" | |||||
def decorator(func): | |||||
"""Add a handler function to the lookup table.""" | |||||
_HANDLERS[token_type] = func | |||||
return func | |||||
return decorator | |||||
class Builder(object): | class Builder(object): | ||||
"""Combines a sequence of tokens into a tree of ``Wikicode`` objects. | |||||
"""Builds a tree of nodes out of a sequence of tokens. | |||||
To use, pass a list of :py:class:`~.Token`\ s to the :py:meth:`build` | |||||
method. The list will be exhausted as it is parsed and a | |||||
:py:class:`~.Wikicode` object will be returned. | |||||
To use, pass a list of :class:`.Token`\ s to the :meth:`build` method. The | |||||
list will be exhausted as it is parsed and a :class:`.Wikicode` object | |||||
containing the node tree will be returned. | |||||
""" | """ | ||||
def __init__(self): | def __init__(self): | ||||
self._tokens = [] | self._tokens = [] | ||||
self._stacks = [] | self._stacks = [] | ||||
def _wrap(self, nodes): | |||||
"""Properly wrap a list of nodes in a ``Wikicode`` object.""" | |||||
return Wikicode(SmartList(nodes)) | |||||
def _push(self): | def _push(self): | ||||
"""Push a new node list onto the stack.""" | """Push a new node list onto the stack.""" | ||||
self._stacks.append([]) | self._stacks.append([]) | ||||
def _pop(self, wrap=True): | |||||
def _pop(self): | |||||
"""Pop the current node list off of the stack. | """Pop the current node list off of the stack. | ||||
If *wrap* is ``True``, we will call :py:meth:`_wrap` on the list. | |||||
The raw node list is wrapped in a :class:`.SmartList` and then in a | |||||
:class:`.Wikicode` object. | |||||
""" | """ | ||||
if wrap: | |||||
return self._wrap(self._stacks.pop()) | |||||
return self._stacks.pop() | |||||
return Wikicode(SmartList(self._stacks.pop())) | |||||
def _write(self, item): | def _write(self, item): | ||||
"""Append a node to the current node list.""" | """Append a node to the current node list.""" | ||||
@@ -84,12 +92,14 @@ class Builder(object): | |||||
self._tokens.append(token) | self._tokens.append(token) | ||||
value = self._pop() | value = self._pop() | ||||
if key is None: | if key is None: | ||||
key = self._wrap([Text(str(default))]) | |||||
key = Wikicode(SmartList([Text(str(default))])) | |||||
return Parameter(key, value, showkey) | return Parameter(key, value, showkey) | ||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_parameter() missed a close token") | |||||
def _handle_template(self): | |||||
@_add_handler(tokens.TemplateOpen) | |||||
def _handle_template(self, token): | |||||
"""Handle a case where a template is at the head of the tokens.""" | """Handle a case where a template is at the head of the tokens.""" | ||||
params = [] | params = [] | ||||
default = 1 | default = 1 | ||||
@@ -109,8 +119,10 @@ class Builder(object): | |||||
return Template(name, params) | return Template(name, params) | ||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_template() missed a close token") | |||||
def _handle_argument(self): | |||||
@_add_handler(tokens.ArgumentOpen) | |||||
def _handle_argument(self, token): | |||||
"""Handle a case where an argument is at the head of the tokens.""" | """Handle a case where an argument is at the head of the tokens.""" | ||||
name = None | name = None | ||||
self._push() | self._push() | ||||
@@ -125,8 +137,10 @@ class Builder(object): | |||||
return Argument(self._pop()) | return Argument(self._pop()) | ||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_argument() missed a close token") | |||||
def _handle_wikilink(self): | |||||
@_add_handler(tokens.WikilinkOpen) | |||||
def _handle_wikilink(self, token): | |||||
"""Handle a case where a wikilink is at the head of the tokens.""" | """Handle a case where a wikilink is at the head of the tokens.""" | ||||
title = None | title = None | ||||
self._push() | self._push() | ||||
@@ -141,7 +155,9 @@ class Builder(object): | |||||
return Wikilink(self._pop()) | return Wikilink(self._pop()) | ||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_wikilink() missed a close token") | |||||
@_add_handler(tokens.ExternalLinkOpen) | |||||
def _handle_external_link(self, token): | def _handle_external_link(self, token): | ||||
"""Handle when an external link is at the head of the tokens.""" | """Handle when an external link is at the head of the tokens.""" | ||||
brackets, url = token.brackets, None | brackets, url = token.brackets, None | ||||
@@ -157,8 +173,10 @@ class Builder(object): | |||||
return ExternalLink(self._pop(), brackets=brackets) | return ExternalLink(self._pop(), brackets=brackets) | ||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_external_link() missed a close token") | |||||
def _handle_entity(self): | |||||
@_add_handler(tokens.HTMLEntityStart) | |||||
def _handle_entity(self, token): | |||||
"""Handle a case where an HTML entity is at the head of the tokens.""" | """Handle a case where an HTML entity is at the head of the tokens.""" | ||||
token = self._tokens.pop() | token = self._tokens.pop() | ||||
if isinstance(token, tokens.HTMLEntityNumeric): | if isinstance(token, tokens.HTMLEntityNumeric): | ||||
@@ -173,6 +191,7 @@ class Builder(object): | |||||
self._tokens.pop() # Remove HTMLEntityEnd | self._tokens.pop() # Remove HTMLEntityEnd | ||||
return HTMLEntity(token.text, named=True, hexadecimal=False) | return HTMLEntity(token.text, named=True, hexadecimal=False) | ||||
@_add_handler(tokens.HeadingStart) | |||||
def _handle_heading(self, token): | def _handle_heading(self, token): | ||||
"""Handle a case where a heading is at the head of the tokens.""" | """Handle a case where a heading is at the head of the tokens.""" | ||||
level = token.level | level = token.level | ||||
@@ -184,8 +203,10 @@ class Builder(object): | |||||
return Heading(title, level) | return Heading(title, level) | ||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_heading() missed a close token") | |||||
def _handle_comment(self): | |||||
@_add_handler(tokens.CommentStart) | |||||
def _handle_comment(self, token): | |||||
"""Handle a case where an HTML comment is at the head of the tokens.""" | """Handle a case where an HTML comment is at the head of the tokens.""" | ||||
self._push() | self._push() | ||||
while self._tokens: | while self._tokens: | ||||
@@ -195,10 +216,11 @@ class Builder(object): | |||||
return Comment(contents) | return Comment(contents) | ||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_comment() missed a close token") | |||||
def _handle_attribute(self, start): | def _handle_attribute(self, start): | ||||
"""Handle a case where a tag attribute is at the head of the tokens.""" | """Handle a case where a tag attribute is at the head of the tokens.""" | ||||
name, quoted = None, False | |||||
name = quotes = None | |||||
self._push() | self._push() | ||||
while self._tokens: | while self._tokens: | ||||
token = self._tokens.pop() | token = self._tokens.pop() | ||||
@@ -206,7 +228,7 @@ class Builder(object): | |||||
name = self._pop() | name = self._pop() | ||||
self._push() | self._push() | ||||
elif isinstance(token, tokens.TagAttrQuote): | elif isinstance(token, tokens.TagAttrQuote): | ||||
quoted = True | |||||
quotes = token.char | |||||
elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen, | elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen, | ||||
tokens.TagCloseSelfclose)): | tokens.TagCloseSelfclose)): | ||||
self._tokens.append(token) | self._tokens.append(token) | ||||
@@ -214,30 +236,37 @@ class Builder(object): | |||||
value = self._pop() | value = self._pop() | ||||
else: | else: | ||||
name, value = self._pop(), None | name, value = self._pop(), None | ||||
return Attribute(name, value, quoted, start.pad_first, | |||||
start.pad_before_eq, start.pad_after_eq) | |||||
return Attribute(name, value, quotes, start.pad_first, | |||||
start.pad_before_eq, start.pad_after_eq, | |||||
check_quotes=False) | |||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_attribute() missed a close token") | |||||
@_add_handler(tokens.TagOpenOpen) | |||||
def _handle_tag(self, token): | def _handle_tag(self, token): | ||||
"""Handle a case where a tag is at the head of the tokens.""" | """Handle a case where a tag is at the head of the tokens.""" | ||||
close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose) | close_tokens = (tokens.TagCloseSelfclose, tokens.TagCloseClose) | ||||
implicit, attrs, contents, closing_tag = False, [], None, None | implicit, attrs, contents, closing_tag = False, [], None, None | ||||
wiki_markup, invalid = token.wiki_markup, token.invalid or False | wiki_markup, invalid = token.wiki_markup, token.invalid or False | ||||
wiki_style_separator, closing_wiki_markup = None, wiki_markup | |||||
self._push() | self._push() | ||||
while self._tokens: | while self._tokens: | ||||
token = self._tokens.pop() | token = self._tokens.pop() | ||||
if isinstance(token, tokens.TagAttrStart): | if isinstance(token, tokens.TagAttrStart): | ||||
attrs.append(self._handle_attribute(token)) | attrs.append(self._handle_attribute(token)) | ||||
elif isinstance(token, tokens.TagCloseOpen): | elif isinstance(token, tokens.TagCloseOpen): | ||||
wiki_style_separator = token.wiki_markup | |||||
padding = token.padding or "" | padding = token.padding or "" | ||||
tag = self._pop() | tag = self._pop() | ||||
self._push() | self._push() | ||||
elif isinstance(token, tokens.TagOpenClose): | elif isinstance(token, tokens.TagOpenClose): | ||||
closing_wiki_markup = token.wiki_markup | |||||
contents = self._pop() | contents = self._pop() | ||||
self._push() | self._push() | ||||
elif isinstance(token, close_tokens): | elif isinstance(token, close_tokens): | ||||
if isinstance(token, tokens.TagCloseSelfclose): | if isinstance(token, tokens.TagCloseSelfclose): | ||||
closing_wiki_markup = token.wiki_markup | |||||
tag = self._pop() | tag = self._pop() | ||||
self_closing = True | self_closing = True | ||||
padding = token.padding or "" | padding = token.padding or "" | ||||
@@ -246,30 +275,19 @@ class Builder(object): | |||||
self_closing = False | self_closing = False | ||||
closing_tag = self._pop() | closing_tag = self._pop() | ||||
return Tag(tag, contents, attrs, wiki_markup, self_closing, | return Tag(tag, contents, attrs, wiki_markup, self_closing, | ||||
invalid, implicit, padding, closing_tag) | |||||
invalid, implicit, padding, closing_tag, | |||||
wiki_style_separator, closing_wiki_markup) | |||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_tag() missed a close token") | |||||
def _handle_token(self, token): | def _handle_token(self, token): | ||||
"""Handle a single token.""" | """Handle a single token.""" | ||||
if isinstance(token, tokens.Text): | |||||
return Text(token.text) | |||||
elif isinstance(token, tokens.TemplateOpen): | |||||
return self._handle_template() | |||||
elif isinstance(token, tokens.ArgumentOpen): | |||||
return self._handle_argument() | |||||
elif isinstance(token, tokens.WikilinkOpen): | |||||
return self._handle_wikilink() | |||||
elif isinstance(token, tokens.ExternalLinkOpen): | |||||
return self._handle_external_link(token) | |||||
elif isinstance(token, tokens.HTMLEntityStart): | |||||
return self._handle_entity() | |||||
elif isinstance(token, tokens.HeadingStart): | |||||
return self._handle_heading(token) | |||||
elif isinstance(token, tokens.CommentStart): | |||||
return self._handle_comment() | |||||
elif isinstance(token, tokens.TagOpenOpen): | |||||
return self._handle_tag(token) | |||||
try: | |||||
return _HANDLERS[type(token)](self, token) | |||||
except KeyError: | |||||
err = "_handle_token() got unexpected {0}" | |||||
raise ParserError(err.format(type(token).__name__)) | |||||
def build(self, tokenlist): | def build(self, tokenlist): | ||||
"""Build a Wikicode object from a list tokens and return it.""" | """Build a Wikicode object from a list tokens and return it.""" | ||||
@@ -280,3 +298,6 @@ class Builder(object): | |||||
node = self._handle_token(self._tokens.pop()) | node = self._handle_token(self._tokens.pop()) | ||||
self._write(node) | self._write(node) | ||||
return self._pop() | return self._pop() | ||||
del _add_handler |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -35,72 +35,81 @@ will cover ``BAR == 0b10`` and ``BAZ == 0b01``). | |||||
Local (stack-specific) contexts: | Local (stack-specific) contexts: | ||||
* :py:const:`TEMPLATE` | |||||
* :const:`TEMPLATE` | |||||
* :py:const:`TEMPLATE_NAME` | |||||
* :py:const:`TEMPLATE_PARAM_KEY` | |||||
* :py:const:`TEMPLATE_PARAM_VALUE` | |||||
* :const:`TEMPLATE_NAME` | |||||
* :const:`TEMPLATE_PARAM_KEY` | |||||
* :const:`TEMPLATE_PARAM_VALUE` | |||||
* :py:const:`ARGUMENT` | |||||
* :const:`ARGUMENT` | |||||
* :py:const:`ARGUMENT_NAME` | |||||
* :py:const:`ARGUMENT_DEFAULT` | |||||
* :const:`ARGUMENT_NAME` | |||||
* :const:`ARGUMENT_DEFAULT` | |||||
* :py:const:`WIKILINK` | |||||
* :const:`WIKILINK` | |||||
* :py:const:`WIKILINK_TITLE` | |||||
* :py:const:`WIKILINK_TEXT` | |||||
* :const:`WIKILINK_TITLE` | |||||
* :const:`WIKILINK_TEXT` | |||||
* :py:const:`EXT_LINK` | |||||
* :const:`EXT_LINK` | |||||
* :py:const:`EXT_LINK_URI` | |||||
* :py:const:`EXT_LINK_TITLE` | |||||
* :const:`EXT_LINK_URI` | |||||
* :const:`EXT_LINK_TITLE` | |||||
* :py:const:`HEADING` | |||||
* :const:`HEADING` | |||||
* :py:const:`HEADING_LEVEL_1` | |||||
* :py:const:`HEADING_LEVEL_2` | |||||
* :py:const:`HEADING_LEVEL_3` | |||||
* :py:const:`HEADING_LEVEL_4` | |||||
* :py:const:`HEADING_LEVEL_5` | |||||
* :py:const:`HEADING_LEVEL_6` | |||||
* :const:`HEADING_LEVEL_1` | |||||
* :const:`HEADING_LEVEL_2` | |||||
* :const:`HEADING_LEVEL_3` | |||||
* :const:`HEADING_LEVEL_4` | |||||
* :const:`HEADING_LEVEL_5` | |||||
* :const:`HEADING_LEVEL_6` | |||||
* :py:const:`TAG` | |||||
* :const:`TAG` | |||||
* :py:const:`TAG_OPEN` | |||||
* :py:const:`TAG_ATTR` | |||||
* :py:const:`TAG_BODY` | |||||
* :py:const:`TAG_CLOSE` | |||||
* :const:`TAG_OPEN` | |||||
* :const:`TAG_ATTR` | |||||
* :const:`TAG_BODY` | |||||
* :const:`TAG_CLOSE` | |||||
* :py:const:`STYLE` | |||||
* :const:`STYLE` | |||||
* :py:const:`STYLE_ITALICS` | |||||
* :py:const:`STYLE_BOLD` | |||||
* :py:const:`STYLE_PASS_AGAIN` | |||||
* :py:const:`STYLE_SECOND_PASS` | |||||
* :const:`STYLE_ITALICS` | |||||
* :const:`STYLE_BOLD` | |||||
* :const:`STYLE_PASS_AGAIN` | |||||
* :const:`STYLE_SECOND_PASS` | |||||
* :py:const:`DL_TERM` | |||||
* :const:`DL_TERM` | |||||
* :py:const:`SAFETY_CHECK` | |||||
* :const:`SAFETY_CHECK` | |||||
* :py:const:`HAS_TEXT` | |||||
* :py:const:`FAIL_ON_TEXT` | |||||
* :py:const:`FAIL_NEXT` | |||||
* :py:const:`FAIL_ON_LBRACE` | |||||
* :py:const:`FAIL_ON_RBRACE` | |||||
* :py:const:`FAIL_ON_EQUALS` | |||||
* :const:`HAS_TEXT` | |||||
* :const:`FAIL_ON_TEXT` | |||||
* :const:`FAIL_NEXT` | |||||
* :const:`FAIL_ON_LBRACE` | |||||
* :const:`FAIL_ON_RBRACE` | |||||
* :const:`FAIL_ON_EQUALS` | |||||
* :const:`TABLE` | |||||
* :const:`TABLE_OPEN` | |||||
* :const:`TABLE_CELL_OPEN` | |||||
* :const:`TABLE_CELL_STYLE` | |||||
* :const:`TABLE_TD_LINE` | |||||
* :const:`TABLE_TH_LINE` | |||||
* :const:`TABLE_CELL_LINE_CONTEXTS` | |||||
Global contexts: | Global contexts: | ||||
* :py:const:`GL_HEADING` | |||||
* :const:`GL_HEADING` | |||||
Aggregate contexts: | Aggregate contexts: | ||||
* :py:const:`FAIL` | |||||
* :py:const:`UNSAFE` | |||||
* :py:const:`DOUBLE` | |||||
* :py:const:`NO_WIKILINKS` | |||||
* :py:const:`NO_EXT_LINKS` | |||||
* :const:`FAIL` | |||||
* :const:`UNSAFE` | |||||
* :const:`DOUBLE` | |||||
* :const:`NO_WIKILINKS` | |||||
* :const:`NO_EXT_LINKS` | |||||
""" | """ | ||||
@@ -155,15 +164,26 @@ FAIL_ON_EQUALS = 1 << 29 | |||||
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | ||||
FAIL_ON_RBRACE + FAIL_ON_EQUALS) | FAIL_ON_RBRACE + FAIL_ON_EQUALS) | ||||
TABLE_OPEN = 1 << 30 | |||||
TABLE_CELL_OPEN = 1 << 31 | |||||
TABLE_CELL_STYLE = 1 << 32 | |||||
TABLE_ROW_OPEN = 1 << 33 | |||||
TABLE_TD_LINE = 1 << 34 | |||||
TABLE_TH_LINE = 1 << 35 | |||||
TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE | |||||
TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN + | |||||
TABLE_TD_LINE + TABLE_TH_LINE) | |||||
# Global contexts: | # Global contexts: | ||||
GL_HEADING = 1 << 0 | GL_HEADING = 1 << 0 | ||||
# Aggregate contexts: | # Aggregate contexts: | ||||
FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE | |||||
FAIL = (TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + | |||||
STYLE + TABLE) | |||||
UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + | UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + | ||||
TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE) | TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE) | ||||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE | |||||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE + TABLE_ROW_OPEN | |||||
NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI | NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI | ||||
NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK | NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK |
@@ -1,6 +1,6 @@ | |||||
/* | /* | ||||
Tokenizer for MWParserFromHell | Tokenizer for MWParserFromHell | ||||
Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
this software and associated documentation files (the "Software"), to deal in | this software and associated documentation files (the "Software"), to deal in | ||||
@@ -69,15 +69,19 @@ static int call_def_func(const char* funcname, PyObject* in1, PyObject* in2, | |||||
/* | /* | ||||
Sanitize the name of a tag so it can be compared with others for equality. | Sanitize the name of a tag so it can be compared with others for equality. | ||||
*/ | */ | ||||
static PyObject* strip_tag_name(PyObject* token) | |||||
static PyObject* strip_tag_name(PyObject* token, int take_attr) | |||||
{ | { | ||||
PyObject *text, *rstripped, *lowered; | PyObject *text, *rstripped, *lowered; | ||||
text = PyObject_GetAttrString(token, "text"); | |||||
if (!text) | |||||
return NULL; | |||||
rstripped = PyObject_CallMethod(text, "rstrip", NULL); | |||||
Py_DECREF(text); | |||||
if (take_attr) { | |||||
text = PyObject_GetAttrString(token, "text"); | |||||
if (!text) | |||||
return NULL; | |||||
rstripped = PyObject_CallMethod(text, "rstrip", NULL); | |||||
Py_DECREF(text); | |||||
} | |||||
else | |||||
rstripped = PyObject_CallMethod(token, "rstrip", NULL); | |||||
if (!rstripped) | if (!rstripped) | ||||
return NULL; | return NULL; | ||||
lowered = PyObject_CallMethod(rstripped, "lower", NULL); | lowered = PyObject_CallMethod(rstripped, "lower", NULL); | ||||
@@ -173,7 +177,7 @@ static TagData* TagData_new(void) | |||||
ALLOC_BUFFER(self->pad_first) | ALLOC_BUFFER(self->pad_first) | ||||
ALLOC_BUFFER(self->pad_before_eq) | ALLOC_BUFFER(self->pad_before_eq) | ||||
ALLOC_BUFFER(self->pad_after_eq) | ALLOC_BUFFER(self->pad_after_eq) | ||||
self->reset = 0; | |||||
self->quoter = self->reset = 0; | |||||
return self; | return self; | ||||
} | } | ||||
@@ -241,7 +245,7 @@ static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) | |||||
/* | /* | ||||
Add a new token stack, context, and textbuffer to the list. | Add a new token stack, context, and textbuffer to the list. | ||||
*/ | */ | ||||
static int Tokenizer_push(Tokenizer* self, int context) | |||||
static int Tokenizer_push(Tokenizer* self, uint64_t context) | |||||
{ | { | ||||
Stack* top = malloc(sizeof(Stack)); | Stack* top = malloc(sizeof(Stack)); | ||||
@@ -333,7 +337,7 @@ static PyObject* Tokenizer_pop(Tokenizer* self) | |||||
static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | ||||
{ | { | ||||
PyObject* stack; | PyObject* stack; | ||||
int context; | |||||
uint64_t context; | |||||
if (Tokenizer_push_textbuffer(self)) | if (Tokenizer_push_textbuffer(self)) | ||||
return NULL; | return NULL; | ||||
@@ -347,11 +351,11 @@ static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | |||||
/* | /* | ||||
Fail the current tokenization route. Discards the current | Fail the current tokenization route. Discards the current | ||||
stack/context/textbuffer and raises a BadRoute exception. | |||||
stack/context/textbuffer and sets the BAD_ROUTE flag. | |||||
*/ | */ | ||||
static void* Tokenizer_fail_route(Tokenizer* self) | static void* Tokenizer_fail_route(Tokenizer* self) | ||||
{ | { | ||||
int context = self->topstack->context; | |||||
uint64_t context = self->topstack->context; | |||||
PyObject* stack = Tokenizer_pop(self); | PyObject* stack = Tokenizer_pop(self); | ||||
Py_XDECREF(stack); | Py_XDECREF(stack); | ||||
@@ -676,11 +680,8 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
for (i = 0; i < braces; i++) text[i] = '{'; | for (i = 0; i < braces; i++) text[i] = '{'; | ||||
text[braces] = '\0'; | text[braces] = '\0'; | ||||
if (Tokenizer_emit_text_then_stack(self, text)) { | |||||
Py_XDECREF(text); | |||||
if (Tokenizer_emit_text_then_stack(self, text)) | |||||
return -1; | return -1; | ||||
} | |||||
Py_XDECREF(text); | |||||
return 0; | return 0; | ||||
} | } | ||||
else | else | ||||
@@ -832,8 +833,6 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) | |||||
Py_DECREF(wikilink); | Py_DECREF(wikilink); | ||||
if (Tokenizer_emit(self, WikilinkClose)) | if (Tokenizer_emit(self, WikilinkClose)) | ||||
return -1; | return -1; | ||||
if (self->topstack->context & LC_FAIL_NEXT) | |||||
self->topstack->context ^= LC_FAIL_NEXT; | |||||
return 0; | return 0; | ||||
} | } | ||||
@@ -1036,7 +1035,7 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next) | |||||
{ | { | ||||
// Built from Tokenizer_parse()'s end sentinels: | // Built from Tokenizer_parse()'s end sentinels: | ||||
Py_UNICODE after = Tokenizer_READ(self, 2); | Py_UNICODE after = Tokenizer_READ(self, 2); | ||||
int ctx = self->topstack->context; | |||||
uint64_t ctx = self->topstack->context; | |||||
return (!this || this == '\n' || this == '[' || this == ']' || | return (!this || this == '\n' || this == '[' || this == ']' || | ||||
this == '<' || this == '>' || (this == '\'' && next == '\'') || | this == '<' || this == '>' || (this == '\'' && next == '\'') || | ||||
@@ -1555,6 +1554,12 @@ static int Tokenizer_parse_comment(Tokenizer* self) | |||||
return -1; | return -1; | ||||
Py_DECREF(comment); | Py_DECREF(comment); | ||||
self->head += 2; | self->head += 2; | ||||
if (self->topstack->context & LC_FAIL_NEXT) { | |||||
/* _verify_safe() sets this flag while parsing a template name | |||||
when it encounters what might be a comment -- we must unset | |||||
it to let _verify_safe() know it was correct: */ | |||||
self->topstack->context ^= LC_FAIL_NEXT; | |||||
} | |||||
return 0; | return 0; | ||||
} | } | ||||
if (Tokenizer_emit_char(self, this)) | if (Tokenizer_emit_char(self, this)) | ||||
@@ -1568,10 +1573,18 @@ static int Tokenizer_parse_comment(Tokenizer* self) | |||||
*/ | */ | ||||
static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data) | static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data) | ||||
{ | { | ||||
PyObject *tokens, *kwargs, *pad_first, *pad_before_eq, *pad_after_eq; | |||||
PyObject *tokens, *kwargs, *tmp, *pad_first, *pad_before_eq, *pad_after_eq; | |||||
if (data->context & TAG_QUOTED) { | if (data->context & TAG_QUOTED) { | ||||
if (Tokenizer_emit_first(self, TagAttrQuote)) | |||||
kwargs = PyDict_New(); | |||||
if (!kwargs) | |||||
return -1; | |||||
tmp = PyUnicode_FromUnicode(&data->quoter, 1); | |||||
if (!tmp) | |||||
return -1; | |||||
PyDict_SetItemString(kwargs, "char", tmp); | |||||
Py_DECREF(tmp); | |||||
if (Tokenizer_emit_first_kwargs(self, TagAttrQuote, kwargs)) | |||||
return -1; | return -1; | ||||
tokens = Tokenizer_pop(self); | tokens = Tokenizer_pop(self); | ||||
if (!tokens) | if (!tokens) | ||||
@@ -1617,9 +1630,9 @@ static int Tokenizer_push_tag_buffer(Tokenizer* self, TagData* data) | |||||
static int | static int | ||||
Tokenizer_handle_tag_space(Tokenizer* self, TagData* data, Py_UNICODE text) | Tokenizer_handle_tag_space(Tokenizer* self, TagData* data, Py_UNICODE text) | ||||
{ | { | ||||
int ctx = data->context; | |||||
int end_of_value = (ctx & TAG_ATTR_VALUE && | |||||
!(ctx & (TAG_QUOTED | TAG_NOTE_QUOTE))); | |||||
uint64_t ctx = data->context; | |||||
uint64_t end_of_value = (ctx & TAG_ATTR_VALUE && | |||||
!(ctx & (TAG_QUOTED | TAG_NOTE_QUOTE))); | |||||
if (end_of_value || (ctx & TAG_QUOTED && ctx & TAG_NOTE_SPACE)) { | if (end_of_value || (ctx & TAG_QUOTED && ctx & TAG_NOTE_SPACE)) { | ||||
if (Tokenizer_push_tag_buffer(self, data)) | if (Tokenizer_push_tag_buffer(self, data)) | ||||
@@ -1718,21 +1731,22 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) | |||||
return -1; | return -1; | ||||
} | } | ||||
} | } | ||||
else if (data->context & TAG_ATTR_VALUE) { | |||||
else { // data->context & TAG_ATTR_VALUE assured | |||||
escaped = (Tokenizer_READ_BACKWARDS(self, 1) == '\\' && | escaped = (Tokenizer_READ_BACKWARDS(self, 1) == '\\' && | ||||
Tokenizer_READ_BACKWARDS(self, 2) != '\\'); | Tokenizer_READ_BACKWARDS(self, 2) != '\\'); | ||||
if (data->context & TAG_NOTE_QUOTE) { | if (data->context & TAG_NOTE_QUOTE) { | ||||
data->context ^= TAG_NOTE_QUOTE; | data->context ^= TAG_NOTE_QUOTE; | ||||
if (chunk == '"' && !escaped) { | |||||
if ((chunk == '"' || chunk == '\'') && !escaped) { | |||||
data->context |= TAG_QUOTED; | data->context |= TAG_QUOTED; | ||||
data->quoter = chunk; | |||||
data->reset = self->head; | |||||
if (Tokenizer_push(self, self->topstack->context)) | if (Tokenizer_push(self, self->topstack->context)) | ||||
return -1; | return -1; | ||||
data->reset = self->head; | |||||
return 0; | return 0; | ||||
} | } | ||||
} | } | ||||
else if (data->context & TAG_QUOTED) { | else if (data->context & TAG_QUOTED) { | ||||
if (chunk == '"' && !escaped) { | |||||
if (chunk == data->quoter && !escaped) { | |||||
data->context |= TAG_NOTE_SPACE; | data->context |= TAG_NOTE_SPACE; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -1802,8 +1816,9 @@ static PyObject* Tokenizer_handle_tag_close_close(Tokenizer* self) | |||||
valid = 0; | valid = 0; | ||||
break; | break; | ||||
case 1: { | case 1: { | ||||
so = strip_tag_name(first); | |||||
sc = strip_tag_name(PyList_GET_ITEM(self->topstack->stack, 1)); | |||||
so = strip_tag_name(first, 1); | |||||
sc = strip_tag_name( | |||||
PyList_GET_ITEM(self->topstack->stack, 1), 1); | |||||
if (so && sc) { | if (so && sc) { | ||||
if (PyUnicode_Compare(so, sc)) | if (PyUnicode_Compare(so, sc)) | ||||
valid = 0; | valid = 0; | ||||
@@ -1838,7 +1853,11 @@ static PyObject* Tokenizer_handle_tag_close_close(Tokenizer* self) | |||||
*/ | */ | ||||
static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self) | static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self) | ||||
{ | { | ||||
Textbuffer* buffer; | |||||
PyObject *buf_tmp, *end_tag, *start_tag; | |||||
Py_UNICODE this, next; | Py_UNICODE this, next; | ||||
Py_ssize_t reset; | |||||
int cmp; | |||||
while (1) { | while (1) { | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
@@ -1846,10 +1865,48 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self) | |||||
if (!this) | if (!this) | ||||
return Tokenizer_fail_route(self); | return Tokenizer_fail_route(self); | ||||
else if (this == '<' && next == '/') { | else if (this == '<' && next == '/') { | ||||
if (Tokenizer_handle_tag_open_close(self)) | |||||
self->head += 2; | |||||
reset = self->head - 1; | |||||
buffer = Textbuffer_new(); | |||||
if (!buffer) | |||||
return NULL; | return NULL; | ||||
self->head++; | |||||
return Tokenizer_parse(self, 0, 0); | |||||
while ((this = Tokenizer_READ(self, 0))) { | |||||
if (this == '>') { | |||||
buf_tmp = Textbuffer_render(buffer); | |||||
if (!buf_tmp) | |||||
return NULL; | |||||
end_tag = strip_tag_name(buf_tmp, 0); | |||||
Py_DECREF(buf_tmp); | |||||
if (!end_tag) | |||||
return NULL; | |||||
start_tag = strip_tag_name( | |||||
PyList_GET_ITEM(self->topstack->stack, 1), 1); | |||||
if (!start_tag) | |||||
return NULL; | |||||
cmp = PyUnicode_Compare(start_tag, end_tag); | |||||
Py_DECREF(end_tag); | |||||
Py_DECREF(start_tag); | |||||
if (cmp) | |||||
goto no_matching_end; | |||||
if (Tokenizer_emit(self, TagOpenClose)) | |||||
return NULL; | |||||
if (Tokenizer_emit_textbuffer(self, buffer, 0)) | |||||
return NULL; | |||||
if (Tokenizer_emit(self, TagCloseClose)) | |||||
return NULL; | |||||
return Tokenizer_pop(self); | |||||
} | |||||
if (!this || this == '\n') { | |||||
no_matching_end: | |||||
Textbuffer_dealloc(buffer); | |||||
self->head = reset; | |||||
if (Tokenizer_emit_text(self, "</")) | |||||
return NULL; | |||||
break; | |||||
} | |||||
Textbuffer_write(&buffer, this); | |||||
self->head++; | |||||
} | |||||
} | } | ||||
else if (this == '&') { | else if (this == '&') { | ||||
if (Tokenizer_parse_entity(self)) | if (Tokenizer_parse_entity(self)) | ||||
@@ -1896,18 +1953,26 @@ static PyObject* Tokenizer_handle_single_tag_end(Tokenizer* self) | |||||
{ | { | ||||
PyObject *token = 0, *padding, *kwargs; | PyObject *token = 0, *padding, *kwargs; | ||||
Py_ssize_t len, index; | Py_ssize_t len, index; | ||||
int is_instance; | |||||
int depth = 1, is_instance; | |||||
len = PyList_GET_SIZE(self->topstack->stack); | len = PyList_GET_SIZE(self->topstack->stack); | ||||
for (index = 0; index < len; index++) { | |||||
for (index = 2; index < len; index++) { | |||||
token = PyList_GET_ITEM(self->topstack->stack, index); | token = PyList_GET_ITEM(self->topstack->stack, index); | ||||
is_instance = PyObject_IsInstance(token, TagCloseOpen); | |||||
is_instance = PyObject_IsInstance(token, TagOpenOpen); | |||||
if (is_instance == -1) | if (is_instance == -1) | ||||
return NULL; | return NULL; | ||||
else if (is_instance == 1) | else if (is_instance == 1) | ||||
break; | |||||
depth++; | |||||
is_instance = PyObject_IsInstance(token, TagCloseOpen); | |||||
if (is_instance == -1) | |||||
return NULL; | |||||
else if (is_instance == 1) { | |||||
depth--; | |||||
if (depth == 0) | |||||
break; | |||||
} | |||||
} | } | ||||
if (!token) | |||||
if (!token || depth > 0) | |||||
return NULL; | return NULL; | ||||
padding = PyObject_GetAttrString(token, "padding"); | padding = PyObject_GetAttrString(token, "padding"); | ||||
if (!padding) | if (!padding) | ||||
@@ -2132,7 +2197,7 @@ static int Tokenizer_emit_style_tag(Tokenizer* self, const char* tag, | |||||
static int Tokenizer_parse_italics(Tokenizer* self) | static int Tokenizer_parse_italics(Tokenizer* self) | ||||
{ | { | ||||
Py_ssize_t reset = self->head; | Py_ssize_t reset = self->head; | ||||
int context; | |||||
uint64_t context; | |||||
PyObject *stack; | PyObject *stack; | ||||
stack = Tokenizer_parse(self, LC_STYLE_ITALICS, 1); | stack = Tokenizer_parse(self, LC_STYLE_ITALICS, 1); | ||||
@@ -2252,7 +2317,7 @@ static int Tokenizer_parse_italics_and_bold(Tokenizer* self) | |||||
*/ | */ | ||||
static PyObject* Tokenizer_parse_style(Tokenizer* self) | static PyObject* Tokenizer_parse_style(Tokenizer* self) | ||||
{ | { | ||||
int context = self->topstack->context, ticks = 2, i; | |||||
uint64_t context = self->topstack->context, ticks = 2, i; | |||||
self->head += 2; | self->head += 2; | ||||
while (Tokenizer_READ(self, 0) == '\'') { | while (Tokenizer_READ(self, 0) == '\'') { | ||||
@@ -2405,9 +2470,363 @@ static int Tokenizer_handle_dl_term(Tokenizer* self) | |||||
} | } | ||||
/* | /* | ||||
Emit a table tag. | |||||
*/ | |||||
static int | |||||
Tokenizer_emit_table_tag(Tokenizer* self, const char* open_open_markup, | |||||
const char* tag, PyObject* style, PyObject* padding, | |||||
const char* close_open_markup, PyObject* contents, | |||||
const char* open_close_markup) | |||||
{ | |||||
PyObject *open_open_kwargs, *open_open_markup_unicode, *close_open_kwargs, | |||||
*close_open_markup_unicode, *open_close_kwargs, | |||||
*open_close_markup_unicode; | |||||
open_open_kwargs = PyDict_New(); | |||||
if (!open_open_kwargs) | |||||
goto fail_decref_all; | |||||
open_open_markup_unicode = PyUnicode_FromString(open_open_markup); | |||||
if (!open_open_markup_unicode) { | |||||
Py_DECREF(open_open_kwargs); | |||||
goto fail_decref_all; | |||||
} | |||||
PyDict_SetItemString(open_open_kwargs, "wiki_markup", | |||||
open_open_markup_unicode); | |||||
Py_DECREF(open_open_markup_unicode); | |||||
if (Tokenizer_emit_kwargs(self, TagOpenOpen, open_open_kwargs)) | |||||
goto fail_decref_all; | |||||
if (Tokenizer_emit_text(self, tag)) | |||||
goto fail_decref_all; | |||||
if (style) { | |||||
if (Tokenizer_emit_all(self, style)) | |||||
goto fail_decref_all; | |||||
Py_DECREF(style); | |||||
} | |||||
close_open_kwargs = PyDict_New(); | |||||
if (!close_open_kwargs) | |||||
goto fail_decref_padding_contents; | |||||
if (close_open_markup && strlen(close_open_markup) != 0) { | |||||
close_open_markup_unicode = PyUnicode_FromString(close_open_markup); | |||||
if (!close_open_markup_unicode) { | |||||
Py_DECREF(close_open_kwargs); | |||||
goto fail_decref_padding_contents; | |||||
} | |||||
PyDict_SetItemString(close_open_kwargs, "wiki_markup", | |||||
close_open_markup_unicode); | |||||
Py_DECREF(close_open_markup_unicode); | |||||
} | |||||
PyDict_SetItemString(close_open_kwargs, "padding", padding); | |||||
Py_DECREF(padding); | |||||
if (Tokenizer_emit_kwargs(self, TagCloseOpen, close_open_kwargs)) | |||||
goto fail_decref_contents; | |||||
if (contents) { | |||||
if (Tokenizer_emit_all(self, contents)) | |||||
goto fail_decref_contents; | |||||
Py_DECREF(contents); | |||||
} | |||||
open_close_kwargs = PyDict_New(); | |||||
if (!open_close_kwargs) | |||||
return -1; | |||||
open_close_markup_unicode = PyUnicode_FromString(open_close_markup); | |||||
if (!open_close_markup_unicode) { | |||||
Py_DECREF(open_close_kwargs); | |||||
return -1; | |||||
} | |||||
PyDict_SetItemString(open_close_kwargs, "wiki_markup", | |||||
open_close_markup_unicode); | |||||
Py_DECREF(open_close_markup_unicode); | |||||
if (Tokenizer_emit_kwargs(self, TagOpenClose, open_close_kwargs)) | |||||
return -1; | |||||
if (Tokenizer_emit_text(self, tag)) | |||||
return -1; | |||||
if (Tokenizer_emit(self, TagCloseClose)) | |||||
return -1; | |||||
return 0; | |||||
fail_decref_all: | |||||
Py_XDECREF(style); | |||||
fail_decref_padding_contents: | |||||
Py_DECREF(padding); | |||||
fail_decref_contents: | |||||
Py_DECREF(contents); | |||||
return -1; | |||||
} | |||||
/* | |||||
Handle style attributes for a table until an ending token. | |||||
*/ | |||||
static PyObject* Tokenizer_handle_table_style(Tokenizer* self, char end_token) | |||||
{ | |||||
TagData *data = TagData_new(); | |||||
PyObject *padding, *trash; | |||||
Py_UNICODE this; | |||||
int can_exit; | |||||
if (!data) | |||||
return NULL; | |||||
data->context = TAG_ATTR_READY; | |||||
while (1) { | |||||
this = Tokenizer_READ(self, 0); | |||||
can_exit = (!(data->context & TAG_QUOTED) || data->context & TAG_NOTE_SPACE); | |||||
if (this == end_token && can_exit) { | |||||
if (data->context & (TAG_ATTR_NAME | TAG_ATTR_VALUE)) { | |||||
if (Tokenizer_push_tag_buffer(self, data)) { | |||||
TagData_dealloc(data); | |||||
return NULL; | |||||
} | |||||
} | |||||
if (Py_UNICODE_ISSPACE(this)) | |||||
Textbuffer_write(&(data->pad_first), this); | |||||
padding = Textbuffer_render(data->pad_first); | |||||
TagData_dealloc(data); | |||||
if (!padding) | |||||
return NULL; | |||||
return padding; | |||||
} | |||||
else if (!this || this == end_token) { | |||||
if (self->topstack->context & LC_TAG_ATTR) { | |||||
if (data->context & TAG_QUOTED) { | |||||
// Unclosed attribute quote: reset, don't die | |||||
data->context = TAG_ATTR_VALUE; | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
self->head = data->reset; | |||||
continue; | |||||
} | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
} | |||||
TagData_dealloc(data); | |||||
return Tokenizer_fail_route(self); | |||||
} | |||||
else { | |||||
if (Tokenizer_handle_tag_data(self, data, this) || BAD_ROUTE) { | |||||
TagData_dealloc(data); | |||||
return NULL; | |||||
} | |||||
} | |||||
self->head++; | |||||
} | |||||
} | |||||
/* | |||||
Parse a wikicode table by starting with the first line. | |||||
*/ | |||||
static int Tokenizer_parse_table(Tokenizer* self) | |||||
{ | |||||
Py_ssize_t reset = self->head + 1; | |||||
PyObject *style, *padding; | |||||
PyObject *table = NULL; | |||||
self->head += 2; | |||||
if(Tokenizer_push(self, LC_TABLE_OPEN)) | |||||
return -1; | |||||
padding = Tokenizer_handle_table_style(self, '\n'); | |||||
if (BAD_ROUTE) { | |||||
RESET_ROUTE(); | |||||
self->head = reset; | |||||
if (Tokenizer_emit_text(self, "{|")) | |||||
return -1; | |||||
return 0; | |||||
} | |||||
if (!padding) | |||||
return -1; | |||||
style = Tokenizer_pop(self); | |||||
if (!style) { | |||||
Py_DECREF(padding); | |||||
return -1; | |||||
} | |||||
self->head++; | |||||
table = Tokenizer_parse(self, LC_TABLE_OPEN, 1); | |||||
if (BAD_ROUTE) { | |||||
RESET_ROUTE(); | |||||
Py_DECREF(padding); | |||||
Py_DECREF(style); | |||||
self->head = reset; | |||||
if (Tokenizer_emit_text(self, "{|")) | |||||
return -1; | |||||
return 0; | |||||
} | |||||
if (!table) { | |||||
Py_DECREF(padding); | |||||
Py_DECREF(style); | |||||
return -1; | |||||
} | |||||
if (Tokenizer_emit_table_tag(self, "{|", "table", style, padding, NULL, | |||||
table, "|}")) | |||||
return -1; | |||||
// Offset displacement done by _parse() | |||||
self->head--; | |||||
return 0; | |||||
} | |||||
/* | |||||
Parse as style until end of the line, then continue. | |||||
*/ | |||||
static int Tokenizer_handle_table_row(Tokenizer* self) | |||||
{ | |||||
PyObject *padding, *style, *row, *trash; | |||||
self->head += 2; | |||||
if (!Tokenizer_CAN_RECURSE(self)) { | |||||
if (Tokenizer_emit_text(self, "|-")) | |||||
return -1; | |||||
self->head -= 1; | |||||
return 0; | |||||
} | |||||
if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN)) | |||||
return -1; | |||||
padding = Tokenizer_handle_table_style(self, '\n'); | |||||
if (BAD_ROUTE) { | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
return 0; | |||||
} | |||||
if (!padding) | |||||
return -1; | |||||
style = Tokenizer_pop(self); | |||||
if (!style) { | |||||
Py_DECREF(padding); | |||||
return -1; | |||||
} | |||||
// Don't parse the style separator | |||||
self->head++; | |||||
row = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_ROW_OPEN, 1); | |||||
if (!row) { | |||||
Py_DECREF(padding); | |||||
Py_DECREF(style); | |||||
return -1; | |||||
} | |||||
if (Tokenizer_emit_table_tag(self, "|-", "tr", style, padding, NULL, row, "")) | |||||
return -1; | |||||
// Offset displacement done by _parse() | |||||
self->head--; | |||||
return 0; | |||||
} | |||||
/* | |||||
Parse as normal syntax unless we hit a style marker, then parse style | |||||
as HTML attributes and the remainder as normal syntax. | |||||
*/ | |||||
static int | |||||
Tokenizer_handle_table_cell(Tokenizer* self, const char *markup, | |||||
const char *tag, uint64_t line_context) | |||||
{ | |||||
uint64_t old_context = self->topstack->context; | |||||
uint64_t cell_context; | |||||
Py_ssize_t reset; | |||||
PyObject *padding, *cell, *style = NULL; | |||||
const char *close_open_markup = NULL; | |||||
self->head += strlen(markup); | |||||
reset = self->head; | |||||
if (!Tokenizer_CAN_RECURSE(self)) { | |||||
if (Tokenizer_emit_text(self, markup)) | |||||
return -1; | |||||
self->head--; | |||||
return 0; | |||||
} | |||||
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | | |||||
LC_TABLE_CELL_STYLE | line_context, 1); | |||||
if (!cell) | |||||
return -1; | |||||
cell_context = self->topstack->context; | |||||
self->topstack->context = old_context; | |||||
if (cell_context & LC_TABLE_CELL_STYLE) { | |||||
Py_DECREF(cell); | |||||
self->head = reset; | |||||
if(Tokenizer_push(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | | |||||
line_context)) | |||||
return -1; | |||||
padding = Tokenizer_handle_table_style(self, '|'); | |||||
if (!padding) | |||||
return -1; | |||||
style = Tokenizer_pop(self); | |||||
if (!style) { | |||||
Py_DECREF(padding); | |||||
return -1; | |||||
} | |||||
// Don't parse the style separator | |||||
self->head++; | |||||
cell = Tokenizer_parse(self, LC_TABLE_OPEN | LC_TABLE_CELL_OPEN | | |||||
line_context, 1); | |||||
if (!cell) { | |||||
Py_DECREF(padding); | |||||
Py_DECREF(style); | |||||
return -1; | |||||
} | |||||
cell_context = self->topstack->context; | |||||
self->topstack->context = old_context; | |||||
} | |||||
else { | |||||
padding = PyUnicode_FromString(""); | |||||
if (!padding) { | |||||
Py_DECREF(cell); | |||||
return -1; | |||||
} | |||||
} | |||||
if (style) { | |||||
close_open_markup = "|"; | |||||
} | |||||
if (Tokenizer_emit_table_tag(self, markup, tag, style, padding, | |||||
close_open_markup, cell, "")) | |||||
return -1; | |||||
// Keep header/cell line contexts | |||||
self->topstack->context |= cell_context & (LC_TABLE_TH_LINE | LC_TABLE_TD_LINE); | |||||
// Offset displacement done by parse() | |||||
self->head--; | |||||
return 0; | |||||
} | |||||
/* | |||||
Returns the context, stack, and whether to reset the cell for style | |||||
in a tuple. | |||||
*/ | |||||
static PyObject* | |||||
Tokenizer_handle_table_cell_end(Tokenizer* self, int reset_for_style) | |||||
{ | |||||
if (reset_for_style) | |||||
self->topstack->context |= LC_TABLE_CELL_STYLE; | |||||
else | |||||
self->topstack->context &= ~LC_TABLE_CELL_STYLE; | |||||
return Tokenizer_pop_keeping_context(self); | |||||
} | |||||
/* | |||||
Return the stack in order to handle the table row end. | |||||
*/ | |||||
static PyObject* Tokenizer_handle_table_row_end(Tokenizer* self) | |||||
{ | |||||
return Tokenizer_pop(self); | |||||
} | |||||
/* | |||||
Return the stack in order to handle the table end. | |||||
*/ | |||||
static PyObject* Tokenizer_handle_table_end(Tokenizer* self) | |||||
{ | |||||
self->head += 2; | |||||
return Tokenizer_pop(self); | |||||
} | |||||
/* | |||||
Handle the end of the stream of wikitext. | Handle the end of the stream of wikitext. | ||||
*/ | */ | ||||
static PyObject* Tokenizer_handle_end(Tokenizer* self, int context) | |||||
static PyObject* Tokenizer_handle_end(Tokenizer* self, uint64_t context) | |||||
{ | { | ||||
PyObject *token, *text, *trash; | PyObject *token, *text, *trash; | ||||
int single; | int single; | ||||
@@ -2423,9 +2842,16 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, int context) | |||||
if (single) | if (single) | ||||
return Tokenizer_handle_single_tag_end(self); | return Tokenizer_handle_single_tag_end(self); | ||||
} | } | ||||
else if (context & AGG_DOUBLE) { | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
else { | |||||
if (context & LC_TABLE_CELL_OPEN) { | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
context = self->topstack->context; | |||||
} | |||||
if (context & AGG_DOUBLE) { | |||||
trash = Tokenizer_pop(self); | |||||
Py_XDECREF(trash); | |||||
} | |||||
} | } | ||||
return Tokenizer_fail_route(self); | return Tokenizer_fail_route(self); | ||||
} | } | ||||
@@ -2436,7 +2862,8 @@ static PyObject* Tokenizer_handle_end(Tokenizer* self, int context) | |||||
Make sure we are not trying to write an invalid character. Return 0 if | Make sure we are not trying to write an invalid character. Return 0 if | ||||
everything is safe, or -1 if the route must be failed. | everything is safe, or -1 if the route must be failed. | ||||
*/ | */ | ||||
static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
static int | |||||
Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data) | |||||
{ | { | ||||
if (context & LC_FAIL_NEXT) | if (context & LC_FAIL_NEXT) | ||||
return -1; | return -1; | ||||
@@ -2463,8 +2890,13 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
return 0; | return 0; | ||||
if (context & LC_HAS_TEXT) { | if (context & LC_HAS_TEXT) { | ||||
if (context & LC_FAIL_ON_TEXT) { | if (context & LC_FAIL_ON_TEXT) { | ||||
if (!Py_UNICODE_ISSPACE(data)) | |||||
if (!Py_UNICODE_ISSPACE(data)) { | |||||
if (data == '<' && Tokenizer_READ(self, 1) == '!') { | |||||
self->topstack->context |= LC_FAIL_NEXT; | |||||
return 0; | |||||
} | |||||
return -1; | return -1; | ||||
} | |||||
} | } | ||||
else { | else { | ||||
if (data == '\n') | if (data == '\n') | ||||
@@ -2481,8 +2913,8 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
} | } | ||||
} | } | ||||
else if (context & LC_FAIL_ON_LBRACE) { | else if (context & LC_FAIL_ON_LBRACE) { | ||||
if (data == '{' || (Tokenizer_READ(self, -1) == '{' && | |||||
Tokenizer_READ(self, -2) == '{')) { | |||||
if (data == '{' || (Tokenizer_READ_BACKWARDS(self, 1) == '{' && | |||||
Tokenizer_READ_BACKWARDS(self, 2) == '{')) { | |||||
if (context & LC_TEMPLATE) | if (context & LC_TEMPLATE) | ||||
self->topstack->context |= LC_FAIL_ON_EQUALS; | self->topstack->context |= LC_FAIL_ON_EQUALS; | ||||
else | else | ||||
@@ -2493,10 +2925,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
} | } | ||||
else if (context & LC_FAIL_ON_RBRACE) { | else if (context & LC_FAIL_ON_RBRACE) { | ||||
if (data == '}') { | if (data == '}') { | ||||
if (context & LC_TEMPLATE) | |||||
self->topstack->context |= LC_FAIL_ON_EQUALS; | |||||
else | |||||
self->topstack->context |= LC_FAIL_NEXT; | |||||
self->topstack->context |= LC_FAIL_NEXT; | |||||
return 0; | return 0; | ||||
} | } | ||||
self->topstack->context ^= LC_FAIL_ON_RBRACE; | self->topstack->context ^= LC_FAIL_ON_RBRACE; | ||||
@@ -2510,12 +2939,30 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
} | } | ||||
/* | /* | ||||
Returns whether the current head has leading whitespace. | |||||
TODO: treat comments and templates as whitespace, allow fail on non-newline spaces. | |||||
*/ | |||||
static int Tokenizer_has_leading_whitespace(Tokenizer* self) | |||||
{ | |||||
int offset = 1; | |||||
Py_UNICODE current_character; | |||||
while (1) { | |||||
current_character = Tokenizer_READ_BACKWARDS(self, offset); | |||||
if (!current_character || current_character == '\n') | |||||
return 1; | |||||
else if (!Py_UNICODE_ISSPACE(current_character)) | |||||
return 0; | |||||
offset++; | |||||
} | |||||
} | |||||
/* | |||||
Parse the wikicode string, using context for when to stop. If push is true, | Parse the wikicode string, using context for when to stop. If push is true, | ||||
we will push a new context, otherwise we won't and context will be ignored. | we will push a new context, otherwise we won't and context will be ignored. | ||||
*/ | */ | ||||
static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
static PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push) | |||||
{ | { | ||||
int this_context; | |||||
uint64_t this_context; | |||||
Py_UNICODE this, next, next_next, last; | Py_UNICODE this, next, next_next, last; | ||||
PyObject* temp; | PyObject* temp; | ||||
@@ -2644,22 +3091,99 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
if (temp != Py_None) | if (temp != Py_None) | ||||
return temp; | return temp; | ||||
} | } | ||||
else if (!last || last == '\n') { | |||||
if (this == '#' || this == '*' || this == ';' || this == ':') { | |||||
if (Tokenizer_handle_list(self)) | |||||
else if ((!last || last == '\n') && (this == '#' || this == '*' || this == ';' || this == ':')) { | |||||
if (Tokenizer_handle_list(self)) | |||||
return NULL; | |||||
} | |||||
else if ((!last || last == '\n') && (this == '-' && this == next && | |||||
this == Tokenizer_READ(self, 2) && | |||||
this == Tokenizer_READ(self, 3))) { | |||||
if (Tokenizer_handle_hr(self)) | |||||
return NULL; | |||||
} | |||||
else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) { | |||||
if (Tokenizer_handle_dl_term(self)) | |||||
return NULL; | |||||
// Kill potential table contexts | |||||
if (this == '\n') | |||||
self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS; | |||||
} | |||||
// Start of table parsing | |||||
else if (this == '{' && next == '|' && Tokenizer_has_leading_whitespace(self)) { | |||||
if (Tokenizer_CAN_RECURSE(self)) { | |||||
if (Tokenizer_parse_table(self)) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == '-' && this == next && | |||||
this == Tokenizer_READ(self, 2) && | |||||
this == Tokenizer_READ(self, 3)) { | |||||
if (Tokenizer_handle_hr(self)) | |||||
else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next)) | |||||
return NULL; | |||||
else | |||||
self->head++; | |||||
} | |||||
else if (this_context & LC_TABLE_OPEN) { | |||||
if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) { | |||||
if (this_context & LC_TABLE_CELL_OPEN) | |||||
return Tokenizer_handle_table_cell_end(self, 0); | |||||
else if (Tokenizer_handle_table_cell(self, "||", "td", LC_TABLE_TD_LINE)) | |||||
return NULL; | |||||
} | |||||
else if (this == '|' && next == '|' && this_context & LC_TABLE_TH_LINE) { | |||||
if (this_context & LC_TABLE_CELL_OPEN) | |||||
return Tokenizer_handle_table_cell_end(self, 0); | |||||
else if (Tokenizer_handle_table_cell(self, "||", "th", LC_TABLE_TH_LINE)) | |||||
return NULL; | |||||
} | |||||
else if (this == '!' && next == '!' && this_context & LC_TABLE_TH_LINE) { | |||||
if (this_context & LC_TABLE_CELL_OPEN) | |||||
return Tokenizer_handle_table_cell_end(self, 0); | |||||
else if (Tokenizer_handle_table_cell(self, "!!", "th", LC_TABLE_TH_LINE)) | |||||
return NULL; | |||||
} | |||||
else if (this == '|' && this_context & LC_TABLE_CELL_STYLE) { | |||||
return Tokenizer_handle_table_cell_end(self, 1); | |||||
} | |||||
// On newline, clear out cell line contexts | |||||
else if (this == '\n' && this_context & LC_TABLE_CELL_LINE_CONTEXTS) { | |||||
self->topstack->context &= ~LC_TABLE_CELL_LINE_CONTEXTS; | |||||
if (Tokenizer_emit_char(self, this)) | |||||
return NULL; | |||||
} | |||||
else if (Tokenizer_has_leading_whitespace(self)) { | |||||
if (this == '|' && next == '}') { | |||||
if (this_context & LC_TABLE_CELL_OPEN) | |||||
return Tokenizer_handle_table_cell_end(self, 0); | |||||
if (this_context & LC_TABLE_ROW_OPEN) | |||||
return Tokenizer_handle_table_row_end(self); | |||||
else | |||||
return Tokenizer_handle_table_end(self); | |||||
} | |||||
else if (this == '|' && next == '-') { | |||||
if (this_context & LC_TABLE_CELL_OPEN) | |||||
return Tokenizer_handle_table_cell_end(self, 0); | |||||
if (this_context & LC_TABLE_ROW_OPEN) | |||||
return Tokenizer_handle_table_row_end(self); | |||||
else if (Tokenizer_handle_table_row(self)) | |||||
return NULL; | |||||
} | |||||
else if (this == '|') { | |||||
if (this_context & LC_TABLE_CELL_OPEN) | |||||
return Tokenizer_handle_table_cell_end(self, 0); | |||||
else if (Tokenizer_handle_table_cell(self, "|", "td", LC_TABLE_TD_LINE)) | |||||
return NULL; | |||||
} | |||||
else if (this == '!') { | |||||
if (this_context & LC_TABLE_CELL_OPEN) | |||||
return Tokenizer_handle_table_cell_end(self, 0); | |||||
else if (Tokenizer_handle_table_cell(self, "!", "th", LC_TABLE_TH_LINE)) | |||||
return NULL; | |||||
} | |||||
else if (Tokenizer_emit_char(self, this)) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (Tokenizer_emit_char(self, this)) | else if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
} | |||||
else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) { | |||||
if (Tokenizer_handle_dl_term(self)) | |||||
// Raise BadRoute to table start | |||||
if (BAD_ROUTE) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (Tokenizer_emit_char(self, this)) | else if (Tokenizer_emit_char(self, this)) | ||||
@@ -2673,8 +3197,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
*/ | */ | ||||
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | ||||
{ | { | ||||
PyObject *text, *temp; | |||||
int context = 0, skip_style_tags = 0; | |||||
PyObject *text, *temp, *tokens; | |||||
uint64_t context = 0; | |||||
int skip_style_tags = 0; | |||||
if (PyArg_ParseTuple(args, "U|ii", &text, &context, &skip_style_tags)) { | if (PyArg_ParseTuple(args, "U|ii", &text, &context, &skip_style_tags)) { | ||||
Py_XDECREF(self->text); | Py_XDECREF(self->text); | ||||
@@ -2696,13 +3221,32 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
Py_XDECREF(temp); | Py_XDECREF(temp); | ||||
self->text = text; | self->text = text; | ||||
} | } | ||||
self->head = self->global = self->depth = self->cycles = 0; | self->head = self->global = self->depth = self->cycles = 0; | ||||
self->length = PyList_GET_SIZE(self->text); | self->length = PyList_GET_SIZE(self->text); | ||||
self->skip_style_tags = skip_style_tags; | self->skip_style_tags = skip_style_tags; | ||||
return Tokenizer_parse(self, context, 1); | |||||
tokens = Tokenizer_parse(self, context, 1); | |||||
if ((!tokens && !PyErr_Occurred()) || self->topstack) { | |||||
if (!ParserError) { | |||||
if (load_exceptions()) | |||||
return NULL; | |||||
} | |||||
if (BAD_ROUTE) { | |||||
RESET_ROUTE(); | |||||
PyErr_SetString(ParserError, "C tokenizer exited with BAD_ROUTE"); | |||||
} | |||||
else if (self->topstack) | |||||
PyErr_SetString(ParserError, | |||||
"C tokenizer exited with non-empty token stack"); | |||||
else | |||||
PyErr_SetString(ParserError, "C tokenizer exited unexpectedly"); | |||||
return NULL; | |||||
} | |||||
return tokens; | |||||
} | } | ||||
static int load_entitydefs(void) | |||||
static int load_entities(void) | |||||
{ | { | ||||
PyObject *tempmod, *defmap, *deflist; | PyObject *tempmod, *defmap, *deflist; | ||||
unsigned numdefs, i; | unsigned numdefs, i; | ||||
@@ -2806,7 +3350,7 @@ static int load_tokens(void) | |||||
return 0; | return 0; | ||||
} | } | ||||
static int load_definitions(void) | |||||
static int load_defs(void) | |||||
{ | { | ||||
PyObject *tempmod, | PyObject *tempmod, | ||||
*globals = PyEval_GetGlobals(), | *globals = PyEval_GetGlobals(), | ||||
@@ -2827,6 +3371,29 @@ static int load_definitions(void) | |||||
return 0; | return 0; | ||||
} | } | ||||
static int load_exceptions(void) | |||||
{ | |||||
PyObject *tempmod, *parsermod, | |||||
*globals = PyEval_GetGlobals(), | |||||
*locals = PyEval_GetLocals(), | |||||
*fromlist = PyList_New(1), | |||||
*modname = IMPORT_NAME_FUNC("parser"); | |||||
char *name = "mwparserfromhell"; | |||||
if (!fromlist || !modname) | |||||
return -1; | |||||
PyList_SET_ITEM(fromlist, 0, modname); | |||||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||||
Py_DECREF(fromlist); | |||||
if (!tempmod) | |||||
return -1; | |||||
parsermod = PyObject_GetAttrString(tempmod, "parser"); | |||||
Py_DECREF(tempmod); | |||||
ParserError = PyObject_GetAttrString(parsermod, "ParserError"); | |||||
Py_DECREF(parsermod); | |||||
return 0; | |||||
} | |||||
PyMODINIT_FUNC INIT_FUNC_NAME(void) | PyMODINIT_FUNC INIT_FUNC_NAME(void) | ||||
{ | { | ||||
PyObject *module; | PyObject *module; | ||||
@@ -2843,9 +3410,7 @@ PyMODINIT_FUNC INIT_FUNC_NAME(void) | |||||
PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); | PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); | ||||
EMPTY = PyUnicode_FromString(""); | EMPTY = PyUnicode_FromString(""); | ||||
NOARGS = PyTuple_New(0); | NOARGS = PyTuple_New(0); | ||||
if (!EMPTY || !NOARGS) | |||||
INIT_ERROR; | |||||
if (load_entitydefs() || load_tokens() || load_definitions()) | |||||
if (!EMPTY || !NOARGS || load_entities() || load_tokens() || load_defs()) | |||||
INIT_ERROR; | INIT_ERROR; | ||||
#ifdef IS_PY3K | #ifdef IS_PY3K | ||||
return module; | return module; | ||||
@@ -1,6 +1,6 @@ | |||||
/* | /* | ||||
Tokenizer Header File for MWParserFromHell | Tokenizer Header File for MWParserFromHell | ||||
Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
this software and associated documentation files (the "Software"), to deal in | this software and associated documentation files (the "Software"), to deal in | ||||
@@ -29,6 +29,7 @@ SOFTWARE. | |||||
#include <math.h> | #include <math.h> | ||||
#include <structmember.h> | #include <structmember.h> | ||||
#include <bytesobject.h> | #include <bytesobject.h> | ||||
#include <stdint.h> | |||||
#if PY_MAJOR_VERSION >= 3 | #if PY_MAJOR_VERSION >= 3 | ||||
#define IS_PY3K | #define IS_PY3K | ||||
@@ -43,16 +44,17 @@ SOFTWARE. | |||||
static const char MARKERS[] = { | static const char MARKERS[] = { | ||||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | '{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | ||||
'-', '\n', '\0'}; | |||||
'-', '!', '\n', '\0'}; | |||||
#define NUM_MARKERS 18 | |||||
#define NUM_MARKERS 19 | |||||
#define TEXTBUFFER_BLOCKSIZE 1024 | #define TEXTBUFFER_BLOCKSIZE 1024 | ||||
#define MAX_DEPTH 40 | #define MAX_DEPTH 40 | ||||
#define MAX_CYCLES 100000 | #define MAX_CYCLES 100000 | ||||
#define MAX_BRACES 255 | #define MAX_BRACES 255 | ||||
#define MAX_ENTITY_SIZE 8 | #define MAX_ENTITY_SIZE 8 | ||||
static int route_state = 0, route_context = 0; | |||||
static int route_state = 0; | |||||
static uint64_t route_context = 0; | |||||
#define BAD_ROUTE route_state | #define BAD_ROUTE route_state | ||||
#define BAD_ROUTE_CONTEXT route_context | #define BAD_ROUTE_CONTEXT route_context | ||||
#define FAIL_ROUTE(context) route_state = 1; route_context = context | #define FAIL_ROUTE(context) route_state = 1; route_context = context | ||||
@@ -62,6 +64,7 @@ static char** entitydefs; | |||||
static PyObject* EMPTY; | static PyObject* EMPTY; | ||||
static PyObject* NOARGS; | static PyObject* NOARGS; | ||||
static PyObject* ParserError; | |||||
static PyObject* definitions; | static PyObject* definitions; | ||||
@@ -108,52 +111,61 @@ static PyObject* TagCloseClose; | |||||
/* Local contexts: */ | /* Local contexts: */ | ||||
#define LC_TEMPLATE 0x00000007 | |||||
#define LC_TEMPLATE_NAME 0x00000001 | |||||
#define LC_TEMPLATE_PARAM_KEY 0x00000002 | |||||
#define LC_TEMPLATE_PARAM_VALUE 0x00000004 | |||||
#define LC_ARGUMENT 0x00000018 | |||||
#define LC_ARGUMENT_NAME 0x00000008 | |||||
#define LC_ARGUMENT_DEFAULT 0x00000010 | |||||
#define LC_WIKILINK 0x00000060 | |||||
#define LC_WIKILINK_TITLE 0x00000020 | |||||
#define LC_WIKILINK_TEXT 0x00000040 | |||||
#define LC_EXT_LINK 0x00000180 | |||||
#define LC_EXT_LINK_URI 0x00000080 | |||||
#define LC_EXT_LINK_TITLE 0x00000100 | |||||
#define LC_HEADING 0x00007E00 | |||||
#define LC_HEADING_LEVEL_1 0x00000200 | |||||
#define LC_HEADING_LEVEL_2 0x00000400 | |||||
#define LC_HEADING_LEVEL_3 0x00000800 | |||||
#define LC_HEADING_LEVEL_4 0x00001000 | |||||
#define LC_HEADING_LEVEL_5 0x00002000 | |||||
#define LC_HEADING_LEVEL_6 0x00004000 | |||||
#define LC_TAG 0x00078000 | |||||
#define LC_TAG_OPEN 0x00008000 | |||||
#define LC_TAG_ATTR 0x00010000 | |||||
#define LC_TAG_BODY 0x00020000 | |||||
#define LC_TAG_CLOSE 0x00040000 | |||||
#define LC_STYLE 0x00780000 | |||||
#define LC_STYLE_ITALICS 0x00080000 | |||||
#define LC_STYLE_BOLD 0x00100000 | |||||
#define LC_STYLE_PASS_AGAIN 0x00200000 | |||||
#define LC_STYLE_SECOND_PASS 0x00400000 | |||||
#define LC_DLTERM 0x00800000 | |||||
#define LC_SAFETY_CHECK 0x3F000000 | |||||
#define LC_HAS_TEXT 0x01000000 | |||||
#define LC_FAIL_ON_TEXT 0x02000000 | |||||
#define LC_FAIL_NEXT 0x04000000 | |||||
#define LC_FAIL_ON_LBRACE 0x08000000 | |||||
#define LC_FAIL_ON_RBRACE 0x10000000 | |||||
#define LC_FAIL_ON_EQUALS 0x20000000 | |||||
#define LC_TEMPLATE 0x0000000000000007 | |||||
#define LC_TEMPLATE_NAME 0x0000000000000001 | |||||
#define LC_TEMPLATE_PARAM_KEY 0x0000000000000002 | |||||
#define LC_TEMPLATE_PARAM_VALUE 0x0000000000000004 | |||||
#define LC_ARGUMENT 0x0000000000000018 | |||||
#define LC_ARGUMENT_NAME 0x0000000000000008 | |||||
#define LC_ARGUMENT_DEFAULT 0x0000000000000010 | |||||
#define LC_WIKILINK 0x0000000000000060 | |||||
#define LC_WIKILINK_TITLE 0x0000000000000020 | |||||
#define LC_WIKILINK_TEXT 0x0000000000000040 | |||||
#define LC_EXT_LINK 0x0000000000000180 | |||||
#define LC_EXT_LINK_URI 0x0000000000000080 | |||||
#define LC_EXT_LINK_TITLE 0x0000000000000100 | |||||
#define LC_HEADING 0x0000000000007E00 | |||||
#define LC_HEADING_LEVEL_1 0x0000000000000200 | |||||
#define LC_HEADING_LEVEL_2 0x0000000000000400 | |||||
#define LC_HEADING_LEVEL_3 0x0000000000000800 | |||||
#define LC_HEADING_LEVEL_4 0x0000000000001000 | |||||
#define LC_HEADING_LEVEL_5 0x0000000000002000 | |||||
#define LC_HEADING_LEVEL_6 0x0000000000004000 | |||||
#define LC_TAG 0x0000000000078000 | |||||
#define LC_TAG_OPEN 0x0000000000008000 | |||||
#define LC_TAG_ATTR 0x0000000000010000 | |||||
#define LC_TAG_BODY 0x0000000000020000 | |||||
#define LC_TAG_CLOSE 0x0000000000040000 | |||||
#define LC_STYLE 0x0000000000780000 | |||||
#define LC_STYLE_ITALICS 0x0000000000080000 | |||||
#define LC_STYLE_BOLD 0x0000000000100000 | |||||
#define LC_STYLE_PASS_AGAIN 0x0000000000200000 | |||||
#define LC_STYLE_SECOND_PASS 0x0000000000400000 | |||||
#define LC_DLTERM 0x0000000000800000 | |||||
#define LC_SAFETY_CHECK 0x000000003F000000 | |||||
#define LC_HAS_TEXT 0x0000000001000000 | |||||
#define LC_FAIL_ON_TEXT 0x0000000002000000 | |||||
#define LC_FAIL_NEXT 0x0000000004000000 | |||||
#define LC_FAIL_ON_LBRACE 0x0000000008000000 | |||||
#define LC_FAIL_ON_RBRACE 0x0000000010000000 | |||||
#define LC_FAIL_ON_EQUALS 0x0000000020000000 | |||||
#define LC_TABLE 0x0000000FC0000000 | |||||
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000000D00000000 | |||||
#define LC_TABLE_OPEN 0x0000000040000000 | |||||
#define LC_TABLE_CELL_OPEN 0x0000000080000000 | |||||
#define LC_TABLE_CELL_STYLE 0x0000000100000000 | |||||
#define LC_TABLE_ROW_OPEN 0x0000000200000000 | |||||
#define LC_TABLE_TD_LINE 0x0000000400000000 | |||||
#define LC_TABLE_TH_LINE 0x0000000800000000 | |||||
/* Global contexts: */ | /* Global contexts: */ | ||||
@@ -161,9 +173,9 @@ static PyObject* TagCloseClose; | |||||
/* Aggregate contexts: */ | /* Aggregate contexts: */ | ||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE) | |||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | #define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | ||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | #define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | ||||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | #define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | ||||
@@ -190,7 +202,7 @@ struct Textbuffer { | |||||
struct Stack { | struct Stack { | ||||
PyObject* stack; | PyObject* stack; | ||||
int context; | |||||
uint64_t context; | |||||
struct Textbuffer* textbuffer; | struct Textbuffer* textbuffer; | ||||
struct Stack* next; | struct Stack* next; | ||||
}; | }; | ||||
@@ -201,10 +213,11 @@ typedef struct { | |||||
} HeadingData; | } HeadingData; | ||||
typedef struct { | typedef struct { | ||||
int context; | |||||
uint64_t context; | |||||
struct Textbuffer* pad_first; | struct Textbuffer* pad_first; | ||||
struct Textbuffer* pad_before_eq; | struct Textbuffer* pad_before_eq; | ||||
struct Textbuffer* pad_after_eq; | struct Textbuffer* pad_after_eq; | ||||
Py_UNICODE quoter; | |||||
Py_ssize_t reset; | Py_ssize_t reset; | ||||
} TagData; | } TagData; | ||||
@@ -265,9 +278,11 @@ static int Tokenizer_parse_entity(Tokenizer*); | |||||
static int Tokenizer_parse_comment(Tokenizer*); | static int Tokenizer_parse_comment(Tokenizer*); | ||||
static int Tokenizer_handle_dl_term(Tokenizer*); | static int Tokenizer_handle_dl_term(Tokenizer*); | ||||
static int Tokenizer_parse_tag(Tokenizer*); | static int Tokenizer_parse_tag(Tokenizer*); | ||||
static PyObject* Tokenizer_parse(Tokenizer*, int, int); | |||||
static PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int); | |||||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | ||||
static int load_exceptions(void); | |||||
/* Macros for Python 2/3 compatibility: */ | /* Macros for Python 2/3 compatibility: */ | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -24,7 +24,7 @@ from __future__ import unicode_literals | |||||
from math import log | from math import log | ||||
import re | import re | ||||
from . import contexts, tokens | |||||
from . import contexts, tokens, ParserError | |||||
from ..compat import htmlentities, range | from ..compat import htmlentities, range | ||||
from ..definitions import (get_html_tag, is_parsable, is_single, | from ..definitions import (get_html_tag, is_parsable, is_single, | ||||
is_single_only, is_scheme) | is_single_only, is_scheme) | ||||
@@ -53,6 +53,7 @@ class _TagOpenData(object): | |||||
def __init__(self): | def __init__(self): | ||||
self.context = self.CX_NAME | self.context = self.CX_NAME | ||||
self.padding_buffer = {"first": "", "before_eq": "", "after_eq": ""} | self.padding_buffer = {"first": "", "before_eq": "", "after_eq": ""} | ||||
self.quoter = None | |||||
self.reset = 0 | self.reset = 0 | ||||
@@ -62,11 +63,11 @@ class Tokenizer(object): | |||||
START = object() | START = object() | ||||
END = object() | END = object() | ||||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", | MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", | ||||
":", "/", "-", "\n", START, END] | |||||
":", "/", "-", "!", "\n", START, END] | |||||
MAX_DEPTH = 40 | MAX_DEPTH = 40 | ||||
MAX_CYCLES = 100000 | MAX_CYCLES = 100000 | ||||
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | ||||
tag_splitter = re.compile(r"([\s\"\\]+)") | |||||
tag_splitter = re.compile(r"([\s\"\'\\]+)") | |||||
def __init__(self): | def __init__(self): | ||||
self._text = None | self._text = None | ||||
@@ -112,7 +113,7 @@ class Tokenizer(object): | |||||
self._textbuffer = [] | self._textbuffer = [] | ||||
def _pop(self, keep_context=False): | def _pop(self, keep_context=False): | ||||
"""Pop the current stack/context/textbuffer, returing the stack. | |||||
"""Pop the current stack/context/textbuffer, returning the stack. | |||||
If *keep_context* is ``True``, then we will replace the underlying | If *keep_context* is ``True``, then we will replace the underlying | ||||
stack's context with the current stack's. | stack's context with the current stack's. | ||||
@@ -134,7 +135,7 @@ class Tokenizer(object): | |||||
"""Fail the current tokenization route. | """Fail the current tokenization route. | ||||
Discards the current stack/context/textbuffer and raises | Discards the current stack/context/textbuffer and raises | ||||
:py:exc:`~.BadRoute`. | |||||
:exc:`.BadRoute`. | |||||
""" | """ | ||||
context = self._context | context = self._context | ||||
self._pop() | self._pop() | ||||
@@ -172,14 +173,14 @@ class Tokenizer(object): | |||||
def _read(self, delta=0, wrap=False, strict=False): | def _read(self, delta=0, wrap=False, strict=False): | ||||
"""Read the value at a relative point in the wikicode. | """Read the value at a relative point in the wikicode. | ||||
The value is read from :py:attr:`self._head <_head>` plus the value of | |||||
The value is read from :attr:`self._head <_head>` plus the value of | |||||
*delta* (which can be negative). If *wrap* is ``False``, we will not | *delta* (which can be negative). If *wrap* is ``False``, we will not | ||||
allow attempts to read from the end of the string if ``self._head + | allow attempts to read from the end of the string if ``self._head + | ||||
delta`` is negative. If *strict* is ``True``, the route will be failed | delta`` is negative. If *strict* is ``True``, the route will be failed | ||||
(with :py:meth:`_fail_route`) if we try to read from past the end of | |||||
the string; otherwise, :py:attr:`self.END <END>` is returned. If we try | |||||
to read from before the start of the string, :py:attr:`self.START | |||||
<START>` is returned. | |||||
(with :meth:`_fail_route`) if we try to read from past the end of the | |||||
string; otherwise, :attr:`self.END <END>` is returned. If we try to | |||||
read from before the start of the string, :attr:`self.START <START>` is | |||||
returned. | |||||
""" | """ | ||||
index = self._head + delta | index = self._head + delta | ||||
if index < 0 and (not wrap or abs(index) > len(self._text)): | if index < 0 and (not wrap or abs(index) > len(self._text)): | ||||
@@ -255,7 +256,7 @@ class Tokenizer(object): | |||||
self._context ^= contexts.TEMPLATE_NAME | self._context ^= contexts.TEMPLATE_NAME | ||||
elif self._context & contexts.TEMPLATE_PARAM_VALUE: | elif self._context & contexts.TEMPLATE_PARAM_VALUE: | ||||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | self._context ^= contexts.TEMPLATE_PARAM_VALUE | ||||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||||
else: | |||||
self._emit_all(self._pop(keep_context=True)) | self._emit_all(self._pop(keep_context=True)) | ||||
self._context |= contexts.TEMPLATE_PARAM_KEY | self._context |= contexts.TEMPLATE_PARAM_KEY | ||||
self._emit(tokens.TemplateParamSeparator()) | self._emit(tokens.TemplateParamSeparator()) | ||||
@@ -296,8 +297,6 @@ class Tokenizer(object): | |||||
self._head = reset | self._head = reset | ||||
self._emit_text("[[") | self._emit_text("[[") | ||||
else: | else: | ||||
if self._context & contexts.FAIL_NEXT: | |||||
self._context ^= contexts.FAIL_NEXT | |||||
self._emit(tokens.WikilinkOpen()) | self._emit(tokens.WikilinkOpen()) | ||||
self._emit_all(wikilink) | self._emit_all(wikilink) | ||||
self._emit(tokens.WikilinkClose()) | self._emit(tokens.WikilinkClose()) | ||||
@@ -370,9 +369,11 @@ class Tokenizer(object): | |||||
if "(" in this and ")" in punct: | if "(" in this and ")" in punct: | ||||
punct = punct[:-1] # ')' is not longer valid punctuation | punct = punct[:-1] # ')' is not longer valid punctuation | ||||
if this.endswith(punct): | if this.endswith(punct): | ||||
for i in reversed(range(-len(this), 0)): | |||||
if i == -len(this) or this[i - 1] not in punct: | |||||
for i in range(len(this) - 1, 0, -1): | |||||
if this[i - 1] not in punct: | |||||
break | break | ||||
else: | |||||
i = 0 | |||||
stripped = this[:i] | stripped = this[:i] | ||||
if stripped and tail: | if stripped and tail: | ||||
self._emit_text(tail) | self._emit_text(tail) | ||||
@@ -607,6 +608,11 @@ class Tokenizer(object): | |||||
self._emit(tokens.CommentEnd()) | self._emit(tokens.CommentEnd()) | ||||
self._emit_all(self._pop()) | self._emit_all(self._pop()) | ||||
self._head += 2 | self._head += 2 | ||||
if self._context & contexts.FAIL_NEXT: | |||||
# _verify_safe() sets this flag while parsing a template | |||||
# name when it encounters what might be a comment -- we | |||||
# must unset it to let _verify_safe() know it was correct: | |||||
self._context ^= contexts.FAIL_NEXT | |||||
return | return | ||||
self._emit_text(this) | self._emit_text(this) | ||||
self._head += 1 | self._head += 1 | ||||
@@ -614,7 +620,7 @@ class Tokenizer(object): | |||||
def _push_tag_buffer(self, data): | def _push_tag_buffer(self, data): | ||||
"""Write a pending tag attribute from *data* to the stack.""" | """Write a pending tag attribute from *data* to the stack.""" | ||||
if data.context & data.CX_QUOTED: | if data.context & data.CX_QUOTED: | ||||
self._emit_first(tokens.TagAttrQuote()) | |||||
self._emit_first(tokens.TagAttrQuote(char=data.quoter)) | |||||
self._emit_all(self._pop()) | self._emit_all(self._pop()) | ||||
buf = data.padding_buffer | buf = data.padding_buffer | ||||
self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], | self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], | ||||
@@ -687,17 +693,18 @@ class Tokenizer(object): | |||||
self._push_tag_buffer(data) | self._push_tag_buffer(data) | ||||
data.context = data.CX_ATTR_NAME | data.context = data.CX_ATTR_NAME | ||||
self._push(contexts.TAG_ATTR) | self._push(contexts.TAG_ATTR) | ||||
elif data.context & data.CX_ATTR_VALUE: | |||||
else: # data.context & data.CX_ATTR_VALUE assured | |||||
escaped = self._read(-1) == "\\" and self._read(-2) != "\\" | escaped = self._read(-1) == "\\" and self._read(-2) != "\\" | ||||
if data.context & data.CX_NOTE_QUOTE: | if data.context & data.CX_NOTE_QUOTE: | ||||
data.context ^= data.CX_NOTE_QUOTE | data.context ^= data.CX_NOTE_QUOTE | ||||
if chunk == '"' and not escaped: | |||||
if chunk in "'\"" and not escaped: | |||||
data.context |= data.CX_QUOTED | data.context |= data.CX_QUOTED | ||||
self._push(self._context) | |||||
data.quoter = chunk | |||||
data.reset = self._head | data.reset = self._head | ||||
self._push(self._context) | |||||
continue | continue | ||||
elif data.context & data.CX_QUOTED: | elif data.context & data.CX_QUOTED: | ||||
if chunk == '"' and not escaped: | |||||
if chunk == data.quoter and not escaped: | |||||
data.context |= data.CX_NOTE_SPACE | data.context |= data.CX_NOTE_SPACE | ||||
continue | continue | ||||
self._handle_tag_text(chunk) | self._handle_tag_text(chunk) | ||||
@@ -728,14 +735,22 @@ class Tokenizer(object): | |||||
def _handle_blacklisted_tag(self): | def _handle_blacklisted_tag(self): | ||||
"""Handle the body of an HTML tag that is parser-blacklisted.""" | """Handle the body of an HTML tag that is parser-blacklisted.""" | ||||
strip = lambda text: text.rstrip().lower() | |||||
while True: | while True: | ||||
this, next = self._read(), self._read(1) | this, next = self._read(), self._read(1) | ||||
if this is self.END: | if this is self.END: | ||||
self._fail_route() | self._fail_route() | ||||
elif this == "<" and next == "/": | elif this == "<" and next == "/": | ||||
self._handle_tag_open_close() | |||||
self._head += 1 | |||||
return self._parse(push=False) | |||||
self._head += 3 | |||||
if self._read() != ">" or (strip(self._read(-1)) != | |||||
strip(self._stack[1].text)): | |||||
self._head -= 1 | |||||
self._emit_text("</") | |||||
continue | |||||
self._emit(tokens.TagOpenClose()) | |||||
self._emit_text(self._read(-1)) | |||||
self._emit(tokens.TagCloseClose()) | |||||
return self._pop() | |||||
elif this == "&": | elif this == "&": | ||||
self._parse_entity() | self._parse_entity() | ||||
else: | else: | ||||
@@ -751,11 +766,21 @@ class Tokenizer(object): | |||||
def _handle_single_tag_end(self): | def _handle_single_tag_end(self): | ||||
"""Handle the stream end when inside a single-supporting HTML tag.""" | """Handle the stream end when inside a single-supporting HTML tag.""" | ||||
gen = enumerate(self._stack) | |||||
index = next(i for i, t in gen if isinstance(t, tokens.TagCloseOpen)) | |||||
padding = self._stack[index].padding | |||||
token = tokens.TagCloseSelfclose(padding=padding, implicit=True) | |||||
self._stack[index] = token | |||||
stack = self._stack | |||||
# We need to find the index of the TagCloseOpen token corresponding to | |||||
# the TagOpenOpen token located at index 0: | |||||
depth = 1 | |||||
for index, token in enumerate(stack[2:], 2): | |||||
if isinstance(token, tokens.TagOpenOpen): | |||||
depth += 1 | |||||
elif isinstance(token, tokens.TagCloseOpen): | |||||
depth -= 1 | |||||
if depth == 0: | |||||
break | |||||
else: # pragma: no cover (untestable/exceptional case) | |||||
raise ParserError("_handle_single_tag_end() missed a TagCloseOpen") | |||||
padding = stack[index].padding | |||||
stack[index] = tokens.TagCloseSelfclose(padding=padding, implicit=True) | |||||
return self._pop() | return self._pop() | ||||
def _really_parse_tag(self): | def _really_parse_tag(self): | ||||
@@ -935,7 +960,7 @@ class Tokenizer(object): | |||||
elif ticks == 3: | elif ticks == 3: | ||||
if self._parse_bold(): | if self._parse_bold(): | ||||
return self._pop() | return self._pop() | ||||
elif ticks == 5: | |||||
else: # ticks == 5 | |||||
self._parse_italics_and_bold() | self._parse_italics_and_bold() | ||||
self._head -= 1 | self._head -= 1 | ||||
@@ -974,12 +999,166 @@ class Tokenizer(object): | |||||
else: | else: | ||||
self._emit_text("\n") | self._emit_text("\n") | ||||
def _emit_table_tag(self, open_open_markup, tag, style, padding, | |||||
close_open_markup, contents, open_close_markup): | |||||
"""Emit a table tag.""" | |||||
self._emit(tokens.TagOpenOpen(wiki_markup=open_open_markup)) | |||||
self._emit_text(tag) | |||||
if style: | |||||
self._emit_all(style) | |||||
if close_open_markup: | |||||
self._emit(tokens.TagCloseOpen(wiki_markup=close_open_markup, | |||||
padding=padding)) | |||||
else: | |||||
self._emit(tokens.TagCloseOpen(padding=padding)) | |||||
if contents: | |||||
self._emit_all(contents) | |||||
self._emit(tokens.TagOpenClose(wiki_markup=open_close_markup)) | |||||
self._emit_text(tag) | |||||
self._emit(tokens.TagCloseClose()) | |||||
def _handle_table_style(self, end_token): | |||||
"""Handle style attributes for a table until ``end_token``.""" | |||||
data = _TagOpenData() | |||||
data.context = _TagOpenData.CX_ATTR_READY | |||||
while True: | |||||
this = self._read() | |||||
can_exit = (not data.context & data.CX_QUOTED or | |||||
data.context & data.CX_NOTE_SPACE) | |||||
if this == end_token and can_exit: | |||||
if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE): | |||||
self._push_tag_buffer(data) | |||||
if this.isspace(): | |||||
data.padding_buffer["first"] += this | |||||
return data.padding_buffer["first"] | |||||
elif this is self.END or this == end_token: | |||||
if self._context & contexts.TAG_ATTR: | |||||
if data.context & data.CX_QUOTED: | |||||
# Unclosed attribute quote: reset, don't die | |||||
data.context = data.CX_ATTR_VALUE | |||||
self._pop() | |||||
self._head = data.reset | |||||
continue | |||||
self._pop() | |||||
self._fail_route() | |||||
else: | |||||
self._handle_tag_data(data, this) | |||||
self._head += 1 | |||||
def _parse_table(self): | |||||
"""Parse a wikicode table by starting with the first line.""" | |||||
reset = self._head + 1 | |||||
self._head += 2 | |||||
self._push(contexts.TABLE_OPEN) | |||||
try: | |||||
padding = self._handle_table_style("\n") | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._emit_text("{|") | |||||
return | |||||
style = self._pop() | |||||
self._head += 1 | |||||
try: | |||||
table = self._parse(contexts.TABLE_OPEN) | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._emit_text("{|") | |||||
return | |||||
self._emit_table_tag("{|", "table", style, padding, None, table, "|}") | |||||
# Offset displacement done by _parse(): | |||||
self._head -= 1 | |||||
def _handle_table_row(self): | |||||
"""Parse as style until end of the line, then continue.""" | |||||
self._head += 2 | |||||
if not self._can_recurse(): | |||||
self._emit_text("|-") | |||||
self._head -= 1 | |||||
return | |||||
self._push(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | |||||
try: | |||||
padding = self._handle_table_style("\n") | |||||
except BadRoute: | |||||
self._pop() | |||||
raise | |||||
style = self._pop() | |||||
# Don't parse the style separator: | |||||
self._head += 1 | |||||
row = self._parse(contexts.TABLE_OPEN | contexts.TABLE_ROW_OPEN) | |||||
self._emit_table_tag("|-", "tr", style, padding, None, row, "") | |||||
# Offset displacement done by parse(): | |||||
self._head -= 1 | |||||
def _handle_table_cell(self, markup, tag, line_context): | |||||
"""Parse as normal syntax unless we hit a style marker, then parse | |||||
style as HTML attributes and the remainder as normal syntax.""" | |||||
old_context = self._context | |||||
padding, style = "", None | |||||
self._head += len(markup) | |||||
reset = self._head | |||||
if not self._can_recurse(): | |||||
self._emit_text(markup) | |||||
self._head -= 1 | |||||
return | |||||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||||
line_context | contexts.TABLE_CELL_STYLE) | |||||
cell_context = self._context | |||||
self._context = old_context | |||||
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE | |||||
if reset_for_style: | |||||
self._head = reset | |||||
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||||
line_context) | |||||
padding = self._handle_table_style("|") | |||||
style = self._pop() | |||||
# Don't parse the style separator: | |||||
self._head += 1 | |||||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||||
line_context) | |||||
cell_context = self._context | |||||
self._context = old_context | |||||
close_open_markup = "|" if reset_for_style else None | |||||
self._emit_table_tag(markup, tag, style, padding, close_open_markup, | |||||
cell, "") | |||||
# Keep header/cell line contexts: | |||||
self._context |= cell_context & (contexts.TABLE_TH_LINE | | |||||
contexts.TABLE_TD_LINE) | |||||
# Offset displacement done by parse(): | |||||
self._head -= 1 | |||||
def _handle_table_cell_end(self, reset_for_style=False): | |||||
"""Returns the current context, with the TABLE_CELL_STYLE flag set if | |||||
it is necessary to reset and parse style attributes.""" | |||||
if reset_for_style: | |||||
self._context |= contexts.TABLE_CELL_STYLE | |||||
else: | |||||
self._context &= ~contexts.TABLE_CELL_STYLE | |||||
return self._pop(keep_context=True) | |||||
def _handle_table_row_end(self): | |||||
"""Return the stack in order to handle the table row end.""" | |||||
return self._pop() | |||||
def _handle_table_end(self): | |||||
"""Return the stack in order to handle the table end.""" | |||||
self._head += 2 | |||||
return self._pop() | |||||
def _handle_end(self): | def _handle_end(self): | ||||
"""Handle the end of the stream of wikitext.""" | """Handle the end of the stream of wikitext.""" | ||||
if self._context & contexts.FAIL: | if self._context & contexts.FAIL: | ||||
if self._context & contexts.TAG_BODY: | if self._context & contexts.TAG_BODY: | ||||
if is_single(self._stack[1].text): | if is_single(self._stack[1].text): | ||||
return self._handle_single_tag_end() | return self._handle_single_tag_end() | ||||
if self._context & contexts.TABLE_CELL_OPEN: | |||||
self._pop() | |||||
if self._context & contexts.DOUBLE: | if self._context & contexts.DOUBLE: | ||||
self._pop() | self._pop() | ||||
self._fail_route() | self._fail_route() | ||||
@@ -1009,6 +1188,9 @@ class Tokenizer(object): | |||||
if context & contexts.HAS_TEXT: | if context & contexts.HAS_TEXT: | ||||
if context & contexts.FAIL_ON_TEXT: | if context & contexts.FAIL_ON_TEXT: | ||||
if this is self.END or not this.isspace(): | if this is self.END or not this.isspace(): | ||||
if this == "<" and self._read(1) == "!": | |||||
self._context |= contexts.FAIL_NEXT | |||||
return True | |||||
return False | return False | ||||
else: | else: | ||||
if this == "\n": | if this == "\n": | ||||
@@ -1032,10 +1214,7 @@ class Tokenizer(object): | |||||
self._context ^= contexts.FAIL_ON_LBRACE | self._context ^= contexts.FAIL_ON_LBRACE | ||||
elif context & contexts.FAIL_ON_RBRACE: | elif context & contexts.FAIL_ON_RBRACE: | ||||
if this == "}": | if this == "}": | ||||
if context & contexts.TEMPLATE: | |||||
self._context |= contexts.FAIL_ON_EQUALS | |||||
else: | |||||
self._context |= contexts.FAIL_NEXT | |||||
self._context |= contexts.FAIL_NEXT | |||||
return True | return True | ||||
self._context ^= contexts.FAIL_ON_RBRACE | self._context ^= contexts.FAIL_ON_RBRACE | ||||
elif this == "{": | elif this == "{": | ||||
@@ -1127,15 +1306,68 @@ class Tokenizer(object): | |||||
result = self._parse_style() | result = self._parse_style() | ||||
if result is not None: | if result is not None: | ||||
return result | return result | ||||
elif self._read(-1) in ("\n", self.START): | |||||
if this in ("#", "*", ";", ":"): | |||||
elif self._read(-1) in ("\n", self.START) and this in ("#", "*", ";", ":"): | |||||
self._handle_list() | self._handle_list() | ||||
elif this == next == self._read(2) == self._read(3) == "-": | |||||
elif self._read(-1) in ("\n", self.START) and this == next == self._read(2) == self._read(3) == "-": | |||||
self._handle_hr() | self._handle_hr() | ||||
else: | |||||
self._emit_text(this) | |||||
elif this in ("\n", ":") and self._context & contexts.DL_TERM: | elif this in ("\n", ":") and self._context & contexts.DL_TERM: | ||||
self._handle_dl_term() | self._handle_dl_term() | ||||
if this == "\n": | |||||
# Kill potential table contexts | |||||
self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS | |||||
# Start of table parsing | |||||
elif this == "{" and next == "|" and (self._read(-1) in ("\n", self.START) or | |||||
(self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): | |||||
if self._can_recurse(): | |||||
self._parse_table() | |||||
else: | |||||
self._emit_text("{|") | |||||
elif self._context & contexts.TABLE_OPEN: | |||||
if this == next == "|" and self._context & contexts.TABLE_TD_LINE: | |||||
if self._context & contexts.TABLE_CELL_OPEN: | |||||
return self._handle_table_cell_end() | |||||
self._handle_table_cell("||", "td", contexts.TABLE_TD_LINE) | |||||
elif this == next == "|" and self._context & contexts.TABLE_TH_LINE: | |||||
if self._context & contexts.TABLE_CELL_OPEN: | |||||
return self._handle_table_cell_end() | |||||
self._handle_table_cell("||", "th", contexts.TABLE_TH_LINE) | |||||
elif this == next == "!" and self._context & contexts.TABLE_TH_LINE: | |||||
if self._context & contexts.TABLE_CELL_OPEN: | |||||
return self._handle_table_cell_end() | |||||
self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE) | |||||
elif this == "|" and self._context & contexts.TABLE_CELL_STYLE: | |||||
return self._handle_table_cell_end(reset_for_style=True) | |||||
# on newline, clear out cell line contexts | |||||
elif this == "\n" and self._context & contexts.TABLE_CELL_LINE_CONTEXTS: | |||||
self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS | |||||
self._emit_text(this) | |||||
elif (self._read(-1) in ("\n", self.START) or | |||||
(self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): | |||||
if this == "|" and next == "}": | |||||
if self._context & contexts.TABLE_CELL_OPEN: | |||||
return self._handle_table_cell_end() | |||||
if self._context & contexts.TABLE_ROW_OPEN: | |||||
return self._handle_table_row_end() | |||||
return self._handle_table_end() | |||||
elif this == "|" and next == "-": | |||||
if self._context & contexts.TABLE_CELL_OPEN: | |||||
return self._handle_table_cell_end() | |||||
if self._context & contexts.TABLE_ROW_OPEN: | |||||
return self._handle_table_row_end() | |||||
self._handle_table_row() | |||||
elif this == "|": | |||||
if self._context & contexts.TABLE_CELL_OPEN: | |||||
return self._handle_table_cell_end() | |||||
self._handle_table_cell("|", "td", contexts.TABLE_TD_LINE) | |||||
elif this == "!": | |||||
if self._context & contexts.TABLE_CELL_OPEN: | |||||
return self._handle_table_cell_end() | |||||
self._handle_table_cell("!", "th", contexts.TABLE_TH_LINE) | |||||
else: | |||||
self._emit_text(this) | |||||
else: | |||||
self._emit_text(this) | |||||
else: | else: | ||||
self._emit_text(this) | self._emit_text(this) | ||||
self._head += 1 | self._head += 1 | ||||
@@ -1146,4 +1378,11 @@ class Tokenizer(object): | |||||
split = self.regex.split(text) | split = self.regex.split(text) | ||||
self._text = [segment for segment in split if segment] | self._text = [segment for segment in split if segment] | ||||
self._head = self._global = self._depth = self._cycles = 0 | self._head = self._global = self._depth = self._cycles = 0 | ||||
return self._parse(context) | |||||
try: | |||||
tokens = self._parse(context) | |||||
except BadRoute: # pragma: no cover (untestable/exceptional case) | |||||
raise ParserError("Python tokenizer exited with BadRoute") | |||||
if self._stacks: # pragma: no cover (untestable/exceptional case) | |||||
err = "Python tokenizer exited with non-empty token stack" | |||||
raise ParserError(err) | |||||
return tokens |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -24,8 +24,8 @@ | |||||
This module contains the token definitions that are used as an intermediate | This module contains the token definitions that are used as an intermediate | ||||
parsing data type - they are stored in a flat list, with each token being | parsing data type - they are stored in a flat list, with each token being | ||||
identified by its type and optional attributes. The token list is generated in | identified by its type and optional attributes. The token list is generated in | ||||
a syntactically valid form by the :py:class:`~.Tokenizer`, and then converted | |||||
into the :py:class`~.Wikicode` tree by the :py:class:`~.Builder`. | |||||
a syntactically valid form by the :class:`.Tokenizer`, and then converted into | |||||
the :class`.Wikicode` tree by the :class:`.Builder`. | |||||
""" | """ | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
@@ -34,7 +34,7 @@ from ..compat import py3k, str | |||||
__all__ = ["Token"] | __all__ = ["Token"] | ||||
class Token (dict): | |||||
class Token(dict): | |||||
"""A token stores the semantic meaning of a unit of wikicode.""" | """A token stores the semantic meaning of a unit of wikicode.""" | ||||
def __repr__(self): | def __repr__(self): | ||||
@@ -100,7 +100,7 @@ CommentEnd = make("CommentEnd") # --> | |||||
TagOpenOpen = make("TagOpenOpen") # < | TagOpenOpen = make("TagOpenOpen") # < | ||||
TagAttrStart = make("TagAttrStart") | TagAttrStart = make("TagAttrStart") | ||||
TagAttrEquals = make("TagAttrEquals") # = | TagAttrEquals = make("TagAttrEquals") # = | ||||
TagAttrQuote = make("TagAttrQuote") # " | |||||
TagAttrQuote = make("TagAttrQuote") # ", ' | |||||
TagCloseOpen = make("TagCloseOpen") # > | TagCloseOpen = make("TagCloseOpen") # > | ||||
TagCloseSelfclose = make("TagCloseSelfclose") # /> | TagCloseSelfclose = make("TagCloseSelfclose") # /> | ||||
TagOpenClose = make("TagOpenClose") # </ | TagOpenClose = make("TagOpenClose") # </ | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,8 +21,8 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
""" | """ | ||||
This module contains the :py:class:`~.SmartList` type, as well as its | |||||
:py:class:`~._ListProxy` child, which together implement a list whose sublists | |||||
This module contains the :class:`.SmartList` type, as well as its | |||||
:class:`._ListProxy` child, which together implement a list whose sublists | |||||
reflect changes made to the main list, and vice-versa. | reflect changes made to the main list, and vice-versa. | ||||
""" | """ | ||||
@@ -35,12 +35,13 @@ __all__ = ["SmartList"] | |||||
def inheritdoc(method): | def inheritdoc(method): | ||||
"""Set __doc__ of *method* to __doc__ of *method* in its parent class. | """Set __doc__ of *method* to __doc__ of *method* in its parent class. | ||||
Since this is used on :py:class:`~.SmartList`, the "parent class" used is | |||||
Since this is used on :class:`.SmartList`, the "parent class" used is | |||||
``list``. This function can be used as a decorator. | ``list``. This function can be used as a decorator. | ||||
""" | """ | ||||
method.__doc__ = getattr(list, method.__name__).__doc__ | method.__doc__ = getattr(list, method.__name__).__doc__ | ||||
return method | return method | ||||
class _SliceNormalizerMixIn(object): | class _SliceNormalizerMixIn(object): | ||||
"""MixIn that provides a private method to normalize slices.""" | """MixIn that provides a private method to normalize slices.""" | ||||
@@ -64,9 +65,9 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
list (such as the addition, removal, or replacement of elements) will be | list (such as the addition, removal, or replacement of elements) will be | ||||
reflected in the sublist, or vice-versa, to the greatest degree possible. | reflected in the sublist, or vice-versa, to the greatest degree possible. | ||||
This is implemented by having sublists - instances of the | This is implemented by having sublists - instances of the | ||||
:py:class:`~._ListProxy` type - dynamically determine their elements by | |||||
storing their slice info and retrieving that slice from the parent. Methods | |||||
that change the size of the list also change the slice info. For example:: | |||||
:class:`._ListProxy` type - dynamically determine their elements by storing | |||||
their slice info and retrieving that slice from the parent. Methods that | |||||
change the size of the list also change the slice info. For example:: | |||||
>>> parent = SmartList([0, 1, 2, 3]) | >>> parent = SmartList([0, 1, 2, 3]) | ||||
>>> parent | >>> parent | ||||
@@ -83,7 +84,9 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
The parent needs to keep a list of its children in order to update them, | The parent needs to keep a list of its children in order to update them, | ||||
which prevents them from being garbage-collected. If you are keeping the | which prevents them from being garbage-collected. If you are keeping the | ||||
parent around for a while but creating many children, it is advisable to | parent around for a while but creating many children, it is advisable to | ||||
call :py:meth:`~._ListProxy.destroy` when you're finished with them. | |||||
call :meth:`._ListProxy.detach` when you're finished with them. Certain | |||||
parent methods, like :meth:`reverse` and :meth:`sort`, will do this | |||||
automatically. | |||||
""" | """ | ||||
def __init__(self, iterable=None): | def __init__(self, iterable=None): | ||||
@@ -151,10 +154,10 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
self.extend(other) | self.extend(other) | ||||
return self | return self | ||||
def _release_children(self): | |||||
copy = list(self) | |||||
for child in self._children: | |||||
child._parent = copy | |||||
def _detach_children(self): | |||||
children = [val[0] for val in self._children.values()] | |||||
for child in children: | |||||
child.detach() | |||||
@inheritdoc | @inheritdoc | ||||
def append(self, item): | def append(self, item): | ||||
@@ -184,13 +187,13 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
@inheritdoc | @inheritdoc | ||||
def reverse(self): | def reverse(self): | ||||
self._release_children() | |||||
self._detach_children() | |||||
super(SmartList, self).reverse() | super(SmartList, self).reverse() | ||||
if py3k: | if py3k: | ||||
@inheritdoc | @inheritdoc | ||||
def sort(self, key=None, reverse=None): | def sort(self, key=None, reverse=None): | ||||
self._release_children() | |||||
self._detach_children() | |||||
kwargs = {} | kwargs = {} | ||||
if key is not None: | if key is not None: | ||||
kwargs["key"] = key | kwargs["key"] = key | ||||
@@ -200,7 +203,7 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
else: | else: | ||||
@inheritdoc | @inheritdoc | ||||
def sort(self, cmp=None, key=None, reverse=None): | def sort(self, cmp=None, key=None, reverse=None): | ||||
self._release_children() | |||||
self._detach_children() | |||||
kwargs = {} | kwargs = {} | ||||
if cmp is not None: | if cmp is not None: | ||||
kwargs["cmp"] = cmp | kwargs["cmp"] = cmp | ||||
@@ -214,15 +217,16 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
class _ListProxy(_SliceNormalizerMixIn, list): | class _ListProxy(_SliceNormalizerMixIn, list): | ||||
"""Implement the ``list`` interface by getting elements from a parent. | """Implement the ``list`` interface by getting elements from a parent. | ||||
This is created by a :py:class:`~.SmartList` object when slicing. It does | |||||
not actually store the list at any time; instead, whenever the list is | |||||
needed, it builds it dynamically using the :py:meth:`_render` method. | |||||
This is created by a :class:`.SmartList` object when slicing. It does not | |||||
actually store the list at any time; instead, whenever the list is needed, | |||||
it builds it dynamically using the :meth:`_render` method. | |||||
""" | """ | ||||
def __init__(self, parent, sliceinfo): | def __init__(self, parent, sliceinfo): | ||||
super(_ListProxy, self).__init__() | super(_ListProxy, self).__init__() | ||||
self._parent = parent | self._parent = parent | ||||
self._sliceinfo = sliceinfo | self._sliceinfo = sliceinfo | ||||
self._detached = False | |||||
def __repr__(self): | def __repr__(self): | ||||
return repr(self._render()) | return repr(self._render()) | ||||
@@ -452,9 +456,17 @@ class _ListProxy(_SliceNormalizerMixIn, list): | |||||
item.sort(**kwargs) | item.sort(**kwargs) | ||||
self._parent[self._start:self._stop:self._step] = item | self._parent[self._start:self._stop:self._step] = item | ||||
def destroy(self): | |||||
"""Make the parent forget this child. The child will no longer work.""" | |||||
self._parent._children.pop(id(self)) | |||||
def detach(self): | |||||
"""Detach the child so it operates like a normal list. | |||||
This allows children to be properly garbage-collected if their parent | |||||
is being kept around for a long time. This method has no effect if the | |||||
child is already detached. | |||||
""" | |||||
if not self._detached: | |||||
self._parent._children.pop(id(self)) | |||||
self._parent = list(self._parent) | |||||
self._detached = True | |||||
del inheritdoc | del inheritdoc |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,7 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
""" | """ | ||||
This module contains the :py:class:`~.StringMixIn` type, which implements the | |||||
This module contains the :class:`.StringMixIn` type, which implements the | |||||
interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. | interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. | ||||
""" | """ | ||||
@@ -35,7 +35,7 @@ __all__ = ["StringMixIn"] | |||||
def inheritdoc(method): | def inheritdoc(method): | ||||
"""Set __doc__ of *method* to __doc__ of *method* in its parent class. | """Set __doc__ of *method* to __doc__ of *method* in its parent class. | ||||
Since this is used on :py:class:`~.StringMixIn`, the "parent class" used is | |||||
Since this is used on :class:`.StringMixIn`, the "parent class" used is | |||||
``str``. This function can be used as a decorator. | ``str``. This function can be used as a decorator. | ||||
""" | """ | ||||
method.__doc__ = getattr(str, method.__name__).__doc__ | method.__doc__ = getattr(str, method.__name__).__doc__ | ||||
@@ -44,11 +44,10 @@ def inheritdoc(method): | |||||
class StringMixIn(object): | class StringMixIn(object): | ||||
"""Implement the interface for ``unicode``/``str`` in a dynamic manner. | """Implement the interface for ``unicode``/``str`` in a dynamic manner. | ||||
To use this class, inherit from it and override the :py:meth:`__unicode__` | |||||
To use this class, inherit from it and override the :meth:`__unicode__` | |||||
method (same on py3k) to return the string representation of the object. | method (same on py3k) to return the string representation of the object. | ||||
The various string methods will operate on the value of | |||||
:py:meth:`__unicode__` instead of the immutable ``self`` like the regular | |||||
``str`` type. | |||||
The various string methods will operate on the value of :meth:`__unicode__` | |||||
instead of the immutable ``self`` like the regular ``str`` type. | |||||
""" | """ | ||||
if py3k: | if py3k: | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -33,23 +33,19 @@ from .smart_list import SmartList | |||||
__all__ = ["parse_anything"] | __all__ = ["parse_anything"] | ||||
def parse_anything(value, context=0): | |||||
"""Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. | |||||
def parse_anything(value, context=0, skip_style_tags=False): | |||||
"""Return a :class:`.Wikicode` for *value*, allowing multiple types. | |||||
This differs from :py:meth:`.Parser.parse` in that we accept more than just | |||||
a string to be parsed. Unicode objects (strings in py3k), strings (bytes in | |||||
py3k), integers (converted to strings), ``None``, existing | |||||
:py:class:`~.Node` or :py:class:`~.Wikicode` objects, as well as an | |||||
iterable of these types, are supported. This is used to parse input | |||||
on-the-fly by various methods of :py:class:`~.Wikicode` and others like | |||||
:py:class:`~.Template`, such as :py:meth:`wikicode.insert() | |||||
<.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`. | |||||
This differs from :meth:`.Parser.parse` in that we accept more than just a | |||||
string to be parsed. Unicode objects (strings in py3k), strings (bytes in | |||||
py3k), integers (converted to strings), ``None``, existing :class:`.Node` | |||||
or :class:`.Wikicode` objects, as well as an iterable of these types, are | |||||
supported. This is used to parse input on-the-fly by various methods of | |||||
:class:`.Wikicode` and others like :class:`.Template`, such as | |||||
:meth:`wikicode.insert() <.Wikicode.insert>` or setting | |||||
:meth:`template.name <.Template.name>`. | |||||
If given, *context* will be passed as a starting context to the parser. | |||||
This is helpful when this function is used inside node attribute setters. | |||||
For example, :py:class:`~.ExternalLink`\ 's :py:attr:`~.ExternalLink.url` | |||||
setter sets *context* to :py:mod:`contexts.EXT_LINK_URI <.contexts>` to | |||||
prevent the URL itself from becoming an :py:class:`~.ExternalLink`. | |||||
Additional arguments are passed directly to :meth:`.Parser.parse`. | |||||
""" | """ | ||||
from .parser import Parser | from .parser import Parser | ||||
from .wikicode import Wikicode | from .wikicode import Wikicode | ||||
@@ -59,18 +55,18 @@ def parse_anything(value, context=0): | |||||
elif isinstance(value, Node): | elif isinstance(value, Node): | ||||
return Wikicode(SmartList([value])) | return Wikicode(SmartList([value])) | ||||
elif isinstance(value, str): | elif isinstance(value, str): | ||||
return Parser().parse(value, context) | |||||
return Parser().parse(value, context, skip_style_tags) | |||||
elif isinstance(value, bytes): | elif isinstance(value, bytes): | ||||
return Parser().parse(value.decode("utf8"), context) | |||||
return Parser().parse(value.decode("utf8"), context, skip_style_tags) | |||||
elif isinstance(value, int): | elif isinstance(value, int): | ||||
return Parser().parse(str(value), context) | |||||
return Parser().parse(str(value), context, skip_style_tags) | |||||
elif value is None: | elif value is None: | ||||
return Wikicode(SmartList()) | return Wikicode(SmartList()) | ||||
try: | try: | ||||
nodelist = SmartList() | nodelist = SmartList() | ||||
for item in value: | for item in value: | ||||
nodelist += parse_anything(item, context).nodes | |||||
nodelist += parse_anything(item, context, skip_style_tags).nodes | |||||
return Wikicode(nodelist) | |||||
except TypeError: | except TypeError: | ||||
error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" | error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" | ||||
raise ValueError(error.format(type(value).__name__, value)) | raise ValueError(error.format(type(value).__name__, value)) | ||||
return Wikicode(nodelist) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -39,11 +39,12 @@ class Wikicode(StringMixIn): | |||||
Additionally, it contains methods that can be used to extract data from or | Additionally, it contains methods that can be used to extract data from or | ||||
modify the nodes, implemented in an interface similar to a list. For | modify the nodes, implemented in an interface similar to a list. For | ||||
example, :py:meth:`index` can get the index of a node in the list, and | |||||
:py:meth:`insert` can add a new node at that index. The :py:meth:`filter() | |||||
example, :meth:`index` can get the index of a node in the list, and | |||||
:meth:`insert` can add a new node at that index. The :meth:`filter() | |||||
<ifilter>` series of functions is very useful for extracting and iterating | <ifilter>` series of functions is very useful for extracting and iterating | ||||
over, for example, all of the templates in the object. | over, for example, all of the templates in the object. | ||||
""" | """ | ||||
RECURSE_OTHERS = 2 | |||||
def __init__(self, nodes): | def __init__(self, nodes): | ||||
super(Wikicode, self).__init__() | super(Wikicode, self).__init__() | ||||
@@ -53,12 +54,15 @@ class Wikicode(StringMixIn): | |||||
return "".join([str(node) for node in self.nodes]) | return "".join([str(node) for node in self.nodes]) | ||||
@staticmethod | @staticmethod | ||||
def _get_children(node, contexts=False, parent=None): | |||||
"""Iterate over all child :py:class:`.Node`\ s of a given *node*.""" | |||||
def _get_children(node, contexts=False, restrict=None, parent=None): | |||||
"""Iterate over all child :class:`.Node`\ s of a given *node*.""" | |||||
yield (parent, node) if contexts else node | yield (parent, node) if contexts else node | ||||
if restrict and isinstance(node, restrict): | |||||
return | |||||
for code in node.__children__(): | for code in node.__children__(): | ||||
for child in code.nodes: | for child in code.nodes: | ||||
for result in Wikicode._get_children(child, contexts, code): | |||||
sub = Wikicode._get_children(child, contexts, restrict, code) | |||||
for result in sub: | |||||
yield result | yield result | ||||
@staticmethod | @staticmethod | ||||
@@ -70,7 +74,7 @@ class Wikicode(StringMixIn): | |||||
@staticmethod | @staticmethod | ||||
def _build_matcher(matches, flags): | def _build_matcher(matches, flags): | ||||
"""Helper for :py:meth:`_indexed_ifilter` and others. | |||||
"""Helper for :meth:`_indexed_ifilter` and others. | |||||
If *matches* is a function, return it. If it's a regex, return a | If *matches* is a function, return it. If it's a regex, return a | ||||
wrapper around it that can be called with a node to do a search. If | wrapper around it that can be called with a node to do a search. If | ||||
@@ -79,22 +83,23 @@ class Wikicode(StringMixIn): | |||||
if matches: | if matches: | ||||
if callable(matches): | if callable(matches): | ||||
return matches | return matches | ||||
return lambda obj: re.search(matches, str(obj), flags) # r | |||||
return lambda obj: re.search(matches, str(obj), flags) | |||||
return lambda obj: True | return lambda obj: True | ||||
def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS, | def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS, | ||||
forcetype=None): | forcetype=None): | ||||
"""Iterate over nodes and their corresponding indices in the node list. | """Iterate over nodes and their corresponding indices in the node list. | ||||
The arguments are interpreted as for :py:meth:`ifilter`. For each tuple | |||||
The arguments are interpreted as for :meth:`ifilter`. For each tuple | |||||
``(i, node)`` yielded by this method, ``self.index(node) == i``. Note | ``(i, node)`` yielded by this method, ``self.index(node) == i``. Note | ||||
that if *recursive* is ``True``, ``self.nodes[i]`` might not be the | that if *recursive* is ``True``, ``self.nodes[i]`` might not be the | ||||
node itself, but will still contain it. | node itself, but will still contain it. | ||||
""" | """ | ||||
match = self._build_matcher(matches, flags) | match = self._build_matcher(matches, flags) | ||||
if recursive: | if recursive: | ||||
restrict = forcetype if recursive == self.RECURSE_OTHERS else None | |||||
def getter(i, node): | def getter(i, node): | ||||
for ch in self._get_children(node): | |||||
for ch in self._get_children(node, restrict=restrict): | |||||
yield (i, ch) | yield (i, ch) | ||||
inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes))) | inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes))) | ||||
else: | else: | ||||
@@ -106,17 +111,17 @@ class Wikicode(StringMixIn): | |||||
def _do_strong_search(self, obj, recursive=True): | def _do_strong_search(self, obj, recursive=True): | ||||
"""Search for the specific element *obj* within the node list. | """Search for the specific element *obj* within the node list. | ||||
*obj* can be either a :py:class:`.Node` or a :py:class:`.Wikicode` | |||||
object. If found, we return a tuple (*context*, *index*) where | |||||
*context* is the :py:class:`.Wikicode` that contains *obj* and *index* | |||||
is its index there, as a :py:class:`slice`. Note that if *recursive* is | |||||
``False``, *context* will always be ``self`` (since we only look for | |||||
*obj* among immediate descendants), but if *recursive* is ``True``, | |||||
then it could be any :py:class:`.Wikicode` contained by a node within | |||||
``self``. If *obj* is not found, :py:exc:`ValueError` is raised. | |||||
*obj* can be either a :class:`.Node` or a :class:`.Wikicode` object. If | |||||
found, we return a tuple (*context*, *index*) where *context* is the | |||||
:class:`.Wikicode` that contains *obj* and *index* is its index there, | |||||
as a :class:`slice`. Note that if *recursive* is ``False``, *context* | |||||
will always be ``self`` (since we only look for *obj* among immediate | |||||
descendants), but if *recursive* is ``True``, then it could be any | |||||
:class:`.Wikicode` contained by a node within ``self``. If *obj* is not | |||||
found, :exc:`ValueError` is raised. | |||||
""" | """ | ||||
mkslice = lambda i: slice(i, i + 1) | |||||
if isinstance(obj, Node): | if isinstance(obj, Node): | ||||
mkslice = lambda i: slice(i, i + 1) | |||||
if not recursive: | if not recursive: | ||||
return self, mkslice(self.index(obj)) | return self, mkslice(self.index(obj)) | ||||
for i, node in enumerate(self.nodes): | for i, node in enumerate(self.nodes): | ||||
@@ -125,26 +130,25 @@ class Wikicode(StringMixIn): | |||||
if not context: | if not context: | ||||
context = self | context = self | ||||
return context, mkslice(context.index(child)) | return context, mkslice(context.index(child)) | ||||
else: | |||||
context, ind = self._do_strong_search(obj.get(0), recursive) | |||||
for i in range(1, len(obj.nodes)): | |||||
if obj.get(i) is not context.get(ind.start + i): | |||||
break | |||||
else: | |||||
return context, slice(ind.start, ind.start + len(obj.nodes)) | |||||
raise ValueError(obj) | |||||
raise ValueError(obj) | |||||
context, ind = self._do_strong_search(obj.get(0), recursive) | |||||
for i in range(1, len(obj.nodes)): | |||||
if obj.get(i) is not context.get(ind.start + i): | |||||
raise ValueError(obj) | |||||
return context, slice(ind.start, ind.start + len(obj.nodes)) | |||||
def _do_weak_search(self, obj, recursive): | def _do_weak_search(self, obj, recursive): | ||||
"""Search for an element that looks like *obj* within the node list. | """Search for an element that looks like *obj* within the node list. | ||||
This follows the same rules as :py:meth:`_do_strong_search` with some | |||||
This follows the same rules as :meth:`_do_strong_search` with some | |||||
differences. *obj* is treated as a string that might represent any | differences. *obj* is treated as a string that might represent any | ||||
:py:class:`.Node`, :py:class:`.Wikicode`, or combination of the two | |||||
present in the node list. Thus, matching is weak (using string | |||||
comparisons) rather than strong (using ``is``). Because multiple nodes | |||||
can match *obj*, the result is a list of tuples instead of just one | |||||
(however, :py:exc:`ValueError` is still raised if nothing is found). | |||||
Individual matches will never overlap. | |||||
:class:`.Node`, :class:`.Wikicode`, or combination of the two present | |||||
in the node list. Thus, matching is weak (using string comparisons) | |||||
rather than strong (using ``is``). Because multiple nodes can match | |||||
*obj*, the result is a list of tuples instead of just one (however, | |||||
:exc:`ValueError` is still raised if nothing is found). Individual | |||||
matches will never overlap. | |||||
The tuples contain a new first element, *exact*, which is ``True`` if | The tuples contain a new first element, *exact*, which is ``True`` if | ||||
we were able to match *obj* exactly to one or more adjacent nodes, or | we were able to match *obj* exactly to one or more adjacent nodes, or | ||||
@@ -208,24 +212,24 @@ class Wikicode(StringMixIn): | |||||
def _build_filter_methods(cls, **meths): | def _build_filter_methods(cls, **meths): | ||||
"""Given Node types, build the corresponding i?filter shortcuts. | """Given Node types, build the corresponding i?filter shortcuts. | ||||
The should be given as keys storing the method's base name paired | |||||
with values storing the corresponding :py:class:`~.Node` type. For | |||||
example, the dict may contain the pair ``("templates", Template)``, | |||||
which will produce the methods :py:meth:`ifilter_templates` and | |||||
:py:meth:`filter_templates`, which are shortcuts for | |||||
:py:meth:`ifilter(forcetype=Template) <ifilter>` and | |||||
:py:meth:`filter(forcetype=Template) <filter>`, respectively. These | |||||
The should be given as keys storing the method's base name paired with | |||||
values storing the corresponding :class:`.Node` type. For example, the | |||||
dict may contain the pair ``("templates", Template)``, which will | |||||
produce the methods :meth:`ifilter_templates` and | |||||
:meth:`filter_templates`, which are shortcuts for | |||||
:meth:`ifilter(forcetype=Template) <ifilter>` and | |||||
:meth:`filter(forcetype=Template) <filter>`, respectively. These | |||||
shortcuts are added to the class itself, with an appropriate docstring. | shortcuts are added to the class itself, with an appropriate docstring. | ||||
""" | """ | ||||
doc = """Iterate over {0}. | doc = """Iterate over {0}. | ||||
This is equivalent to :py:meth:`{1}` with *forcetype* set to | |||||
:py:class:`~{2.__module__}.{2.__name__}`. | |||||
This is equivalent to :meth:`{1}` with *forcetype* set to | |||||
:class:`~{2.__module__}.{2.__name__}`. | |||||
""" | """ | ||||
make_ifilter = lambda ftype: (lambda self, **kw: | |||||
self.ifilter(forcetype=ftype, **kw)) | |||||
make_filter = lambda ftype: (lambda self, **kw: | |||||
self.filter(forcetype=ftype, **kw)) | |||||
make_ifilter = lambda ftype: (lambda self, *a, **kw: | |||||
self.ifilter(forcetype=ftype, *a, **kw)) | |||||
make_filter = lambda ftype: (lambda self, *a, **kw: | |||||
self.filter(forcetype=ftype, *a, **kw)) | |||||
for name, ftype in (meths.items() if py3k else meths.iteritems()): | for name, ftype in (meths.items() if py3k else meths.iteritems()): | ||||
ifilter = make_ifilter(ftype) | ifilter = make_ifilter(ftype) | ||||
filter = make_filter(ftype) | filter = make_filter(ftype) | ||||
@@ -236,10 +240,10 @@ class Wikicode(StringMixIn): | |||||
@property | @property | ||||
def nodes(self): | def nodes(self): | ||||
"""A list of :py:class:`~.Node` objects. | |||||
"""A list of :class:`.Node` objects. | |||||
This is the internal data actually stored within a | |||||
:py:class:`~.Wikicode` object. | |||||
This is the internal data actually stored within a :class:`.Wikicode` | |||||
object. | |||||
""" | """ | ||||
return self._nodes | return self._nodes | ||||
@@ -256,11 +260,10 @@ class Wikicode(StringMixIn): | |||||
def set(self, index, value): | def set(self, index, value): | ||||
"""Set the ``Node`` at *index* to *value*. | """Set the ``Node`` at *index* to *value*. | ||||
Raises :py:exc:`IndexError` if *index* is out of range, or | |||||
:py:exc:`ValueError` if *value* cannot be coerced into one | |||||
:py:class:`~.Node`. To insert multiple nodes at an index, use | |||||
:py:meth:`get` with either :py:meth:`remove` and :py:meth:`insert` or | |||||
:py:meth:`replace`. | |||||
Raises :exc:`IndexError` if *index* is out of range, or | |||||
:exc:`ValueError` if *value* cannot be coerced into one :class:`.Node`. | |||||
To insert multiple nodes at an index, use :meth:`get` with either | |||||
:meth:`remove` and :meth:`insert` or :meth:`replace`. | |||||
""" | """ | ||||
nodes = parse_anything(value).nodes | nodes = parse_anything(value).nodes | ||||
if len(nodes) > 1: | if len(nodes) > 1: | ||||
@@ -275,7 +278,7 @@ class Wikicode(StringMixIn): | |||||
def index(self, obj, recursive=False): | def index(self, obj, recursive=False): | ||||
"""Return the index of *obj* in the list of nodes. | """Return the index of *obj* in the list of nodes. | ||||
Raises :py:exc:`ValueError` if *obj* is not found. If *recursive* is | |||||
Raises :exc:`ValueError` if *obj* is not found. If *recursive* is | |||||
``True``, we will look in all nodes of ours and their descendants, and | ``True``, we will look in all nodes of ours and their descendants, and | ||||
return the index of our direct descendant node within *our* list of | return the index of our direct descendant node within *our* list of | ||||
nodes. Otherwise, the lookup is done only on direct descendants. | nodes. Otherwise, the lookup is done only on direct descendants. | ||||
@@ -294,9 +297,8 @@ class Wikicode(StringMixIn): | |||||
def insert(self, index, value): | def insert(self, index, value): | ||||
"""Insert *value* at *index* in the list of nodes. | """Insert *value* at *index* in the list of nodes. | ||||
*value* can be anything parasable by :py:func:`.parse_anything`, which | |||||
includes strings or other :py:class:`~.Wikicode` or :py:class:`~.Node` | |||||
objects. | |||||
*value* can be anything parsable by :func:`.parse_anything`, which | |||||
includes strings or other :class:`.Wikicode` or :class:`.Node` objects. | |||||
""" | """ | ||||
nodes = parse_anything(value).nodes | nodes = parse_anything(value).nodes | ||||
for node in reversed(nodes): | for node in reversed(nodes): | ||||
@@ -305,15 +307,14 @@ class Wikicode(StringMixIn): | |||||
def insert_before(self, obj, value, recursive=True): | def insert_before(self, obj, value, recursive=True): | ||||
"""Insert *value* immediately before *obj*. | """Insert *value* immediately before *obj*. | ||||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | |||||
for example). If *obj* is a string, we will operate on all instances | |||||
of that string within the code, otherwise only on the specific instance | |||||
given. *value* can be anything parasable by :py:func:`.parse_anything`. | |||||
If *recursive* is ``True``, we will try to find *obj* within our child | |||||
nodes even if it is not a direct descendant of this | |||||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||||
:py:exc:`ValueError` is raised. | |||||
*obj* can be either a string, a :class:`.Node`, or another | |||||
:class:`.Wikicode` object (as created by :meth:`get_sections`, for | |||||
example). If *obj* is a string, we will operate on all instances of | |||||
that string within the code, otherwise only on the specific instance | |||||
given. *value* can be anything parsable by :func:`.parse_anything`. If | |||||
*recursive* is ``True``, we will try to find *obj* within our child | |||||
nodes even if it is not a direct descendant of this :class:`.Wikicode` | |||||
object. If *obj* is not found, :exc:`ValueError` is raised. | |||||
""" | """ | ||||
if isinstance(obj, (Node, Wikicode)): | if isinstance(obj, (Node, Wikicode)): | ||||
context, index = self._do_strong_search(obj, recursive) | context, index = self._do_strong_search(obj, recursive) | ||||
@@ -329,15 +330,14 @@ class Wikicode(StringMixIn): | |||||
def insert_after(self, obj, value, recursive=True): | def insert_after(self, obj, value, recursive=True): | ||||
"""Insert *value* immediately after *obj*. | """Insert *value* immediately after *obj*. | ||||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | |||||
for example). If *obj* is a string, we will operate on all instances | |||||
of that string within the code, otherwise only on the specific instance | |||||
given. *value* can be anything parasable by :py:func:`.parse_anything`. | |||||
If *recursive* is ``True``, we will try to find *obj* within our child | |||||
nodes even if it is not a direct descendant of this | |||||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||||
:py:exc:`ValueError` is raised. | |||||
*obj* can be either a string, a :class:`.Node`, or another | |||||
:class:`.Wikicode` object (as created by :meth:`get_sections`, for | |||||
example). If *obj* is a string, we will operate on all instances of | |||||
that string within the code, otherwise only on the specific instance | |||||
given. *value* can be anything parsable by :func:`.parse_anything`. If | |||||
*recursive* is ``True``, we will try to find *obj* within our child | |||||
nodes even if it is not a direct descendant of this :class:`.Wikicode` | |||||
object. If *obj* is not found, :exc:`ValueError` is raised. | |||||
""" | """ | ||||
if isinstance(obj, (Node, Wikicode)): | if isinstance(obj, (Node, Wikicode)): | ||||
context, index = self._do_strong_search(obj, recursive) | context, index = self._do_strong_search(obj, recursive) | ||||
@@ -353,15 +353,14 @@ class Wikicode(StringMixIn): | |||||
def replace(self, obj, value, recursive=True): | def replace(self, obj, value, recursive=True): | ||||
"""Replace *obj* with *value*. | """Replace *obj* with *value*. | ||||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | |||||
for example). If *obj* is a string, we will operate on all instances | |||||
of that string within the code, otherwise only on the specific instance | |||||
given. *value* can be anything parasable by :py:func:`.parse_anything`. | |||||
*obj* can be either a string, a :class:`.Node`, or another | |||||
:class:`.Wikicode` object (as created by :meth:`get_sections`, for | |||||
example). If *obj* is a string, we will operate on all instances of | |||||
that string within the code, otherwise only on the specific instance | |||||
given. *value* can be anything parsable by :func:`.parse_anything`. | |||||
If *recursive* is ``True``, we will try to find *obj* within our child | If *recursive* is ``True``, we will try to find *obj* within our child | ||||
nodes even if it is not a direct descendant of this | |||||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||||
:py:exc:`ValueError` is raised. | |||||
nodes even if it is not a direct descendant of this :class:`.Wikicode` | |||||
object. If *obj* is not found, :exc:`ValueError` is raised. | |||||
""" | """ | ||||
if isinstance(obj, (Node, Wikicode)): | if isinstance(obj, (Node, Wikicode)): | ||||
context, index = self._do_strong_search(obj, recursive) | context, index = self._do_strong_search(obj, recursive) | ||||
@@ -380,7 +379,7 @@ class Wikicode(StringMixIn): | |||||
def append(self, value): | def append(self, value): | ||||
"""Insert *value* at the end of the list of nodes. | """Insert *value* at the end of the list of nodes. | ||||
*value* can be anything parasable by :py:func:`.parse_anything`. | |||||
*value* can be anything parsable by :func:`.parse_anything`. | |||||
""" | """ | ||||
nodes = parse_anything(value).nodes | nodes = parse_anything(value).nodes | ||||
for node in nodes: | for node in nodes: | ||||
@@ -389,14 +388,14 @@ class Wikicode(StringMixIn): | |||||
def remove(self, obj, recursive=True): | def remove(self, obj, recursive=True): | ||||
"""Remove *obj* from the list of nodes. | """Remove *obj* from the list of nodes. | ||||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | |||||
for example). If *obj* is a string, we will operate on all instances | |||||
of that string within the code, otherwise only on the specific instance | |||||
*obj* can be either a string, a :class:`.Node`, or another | |||||
:class:`.Wikicode` object (as created by :meth:`get_sections`, for | |||||
example). If *obj* is a string, we will operate on all instances of | |||||
that string within the code, otherwise only on the specific instance | |||||
given. If *recursive* is ``True``, we will try to find *obj* within our | given. If *recursive* is ``True``, we will try to find *obj* within our | ||||
child nodes even if it is not a direct descendant of this | child nodes even if it is not a direct descendant of this | ||||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||||
:py:exc:`ValueError` is raised. | |||||
:class:`.Wikicode` object. If *obj* is not found, :exc:`ValueError` is | |||||
raised. | |||||
""" | """ | ||||
if isinstance(obj, (Node, Wikicode)): | if isinstance(obj, (Node, Wikicode)): | ||||
context, index = self._do_strong_search(obj, recursive) | context, index = self._do_strong_search(obj, recursive) | ||||
@@ -413,10 +412,10 @@ class Wikicode(StringMixIn): | |||||
def matches(self, other): | def matches(self, other): | ||||
"""Do a loose equivalency test suitable for comparing page names. | """Do a loose equivalency test suitable for comparing page names. | ||||
*other* can be any string-like object, including | |||||
:py:class:`~.Wikicode`, or a tuple of these. This operation is | |||||
symmetric; both sides are adjusted. Specifically, whitespace and markup | |||||
is stripped and the first letter's case is normalized. Typical usage is | |||||
*other* can be any string-like object, including :class:`.Wikicode`, or | |||||
a tuple of these. This operation is symmetric; both sides are adjusted. | |||||
Specifically, whitespace and markup is stripped and the first letter's | |||||
case is normalized. Typical usage is | |||||
``if template.name.matches("stub"): ...``. | ``if template.name.matches("stub"): ...``. | ||||
""" | """ | ||||
cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:] | cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:] | ||||
@@ -435,35 +434,44 @@ class Wikicode(StringMixIn): | |||||
forcetype=None): | forcetype=None): | ||||
"""Iterate over nodes in our list matching certain conditions. | """Iterate over nodes in our list matching certain conditions. | ||||
If *recursive* is ``True``, we will iterate over our children and all | |||||
of their descendants, otherwise just our immediate children. If | |||||
*forcetype* is given, only nodes that are instances of this type are | |||||
yielded. *matches* can be used to further restrict the nodes, either as | |||||
a function (taking a single :py:class:`.Node` and returning a boolean) | |||||
or a regular expression (matched against the node's string | |||||
representation with :py:func:`re.search`). If *matches* is a regex, the | |||||
flags passed to :py:func:`re.search` are :py:const:`re.IGNORECASE`, | |||||
:py:const:`re.DOTALL`, and :py:const:`re.UNICODE`, but custom flags can | |||||
be specified by passing *flags*. | |||||
If *forcetype* is given, only nodes that are instances of this type (or | |||||
tuple of types) are yielded. Setting *recursive* to ``True`` will | |||||
iterate over all children and their descendants. ``RECURSE_OTHERS`` | |||||
will only iterate over children that are not the instances of | |||||
*forcetype*. ``False`` will only iterate over immediate children. | |||||
``RECURSE_OTHERS`` can be used to iterate over all un-nested templates, | |||||
even if they are inside of HTML tags, like so: | |||||
>>> code = mwparserfromhell.parse("{{foo}}<b>{{foo|{{bar}}}}</b>") | |||||
>>> code.filter_templates(code.RECURSE_OTHERS) | |||||
["{{foo}}", "{{foo|{{bar}}}}"] | |||||
*matches* can be used to further restrict the nodes, either as a | |||||
function (taking a single :class:`.Node` and returning a boolean) or a | |||||
regular expression (matched against the node's string representation | |||||
with :func:`re.search`). If *matches* is a regex, the flags passed to | |||||
:func:`re.search` are :const:`re.IGNORECASE`, :const:`re.DOTALL`, and | |||||
:const:`re.UNICODE`, but custom flags can be specified by passing | |||||
*flags*. | |||||
""" | """ | ||||
return (node for i, node in | |||||
self._indexed_ifilter(recursive, matches, flags, forcetype)) | |||||
gen = self._indexed_ifilter(recursive, matches, flags, forcetype) | |||||
return (node for i, node in gen) | |||||
def filter(self, recursive=True, matches=None, flags=FLAGS, | |||||
forcetype=None): | |||||
def filter(self, *args, **kwargs): | |||||
"""Return a list of nodes within our list matching certain conditions. | """Return a list of nodes within our list matching certain conditions. | ||||
This is equivalent to calling :py:func:`list` on :py:meth:`ifilter`. | |||||
This is equivalent to calling :func:`list` on :meth:`ifilter`. | |||||
""" | """ | ||||
return list(self.ifilter(recursive, matches, flags, forcetype)) | |||||
return list(self.ifilter(*args, **kwargs)) | |||||
def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False, | def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False, | ||||
include_lead=None, include_headings=True): | include_lead=None, include_headings=True): | ||||
"""Return a list of sections within the page. | """Return a list of sections within the page. | ||||
Sections are returned as :py:class:`~.Wikicode` objects with a shared | |||||
node list (implemented using :py:class:`~.SmartList`) so that changes | |||||
to sections are reflected in the parent Wikicode object. | |||||
Sections are returned as :class:`.Wikicode` objects with a shared node | |||||
list (implemented using :class:`.SmartList`) so that changes to | |||||
sections are reflected in the parent Wikicode object. | |||||
Each section contains all of its subsections, unless *flat* is | Each section contains all of its subsections, unless *flat* is | ||||
``True``. If *levels* is given, it should be a iterable of integers; | ``True``. If *levels* is given, it should be a iterable of integers; | ||||
@@ -471,14 +479,13 @@ class Wikicode(StringMixIn): | |||||
*matches* is given, it should be either a function or a regex; only | *matches* is given, it should be either a function or a regex; only | ||||
sections whose headings match it (without the surrounding equal signs) | sections whose headings match it (without the surrounding equal signs) | ||||
will be included. *flags* can be used to override the default regex | will be included. *flags* can be used to override the default regex | ||||
flags (see :py:meth:`ifilter`) if a regex *matches* is used. | |||||
flags (see :meth:`ifilter`) if a regex *matches* is used. | |||||
If *include_lead* is ``True``, the first, lead section (without a | If *include_lead* is ``True``, the first, lead section (without a | ||||
heading) will be included in the list; ``False`` will not include it; | heading) will be included in the list; ``False`` will not include it; | ||||
the default will include it only if no specific *levels* were given. If | the default will include it only if no specific *levels* were given. If | ||||
*include_headings* is ``True``, the section's beginning | *include_headings* is ``True``, the section's beginning | ||||
:py:class:`~.Heading` object will be included; otherwise, this is | |||||
skipped. | |||||
:class:`.Heading` object will be included; otherwise, this is skipped. | |||||
""" | """ | ||||
title_matcher = self._build_matcher(matches, flags) | title_matcher = self._build_matcher(matches, flags) | ||||
matcher = lambda heading: (title_matcher(heading.title) and | matcher = lambda heading: (title_matcher(heading.title) and | ||||
@@ -527,7 +534,7 @@ class Wikicode(StringMixIn): | |||||
"""Return a rendered string without unprintable code such as templates. | """Return a rendered string without unprintable code such as templates. | ||||
The way a node is stripped is handled by the | The way a node is stripped is handled by the | ||||
:py:meth:`~.Node.__strip__` method of :py:class:`~.Node` objects, which | |||||
:meth:`~.Node.__strip__` method of :class:`.Node` objects, which | |||||
generally return a subset of their nodes or ``None``. For example, | generally return a subset of their nodes or ``None``. For example, | ||||
templates and tags are removed completely, links are stripped to just | templates and tags are removed completely, links are stripped to just | ||||
their display part, headings are stripped to just their title. If | their display part, headings are stripped to just their title. If | ||||
@@ -555,12 +562,12 @@ class Wikicode(StringMixIn): | |||||
"""Return a hierarchical tree representation of the object. | """Return a hierarchical tree representation of the object. | ||||
The representation is a string makes the most sense printed. It is | The representation is a string makes the most sense printed. It is | ||||
built by calling :py:meth:`_get_tree` on the | |||||
:py:class:`~.Wikicode` object and its children recursively. The end | |||||
result may look something like the following:: | |||||
built by calling :meth:`_get_tree` on the :class:`.Wikicode` object and | |||||
its children recursively. The end result may look something like the | |||||
following:: | |||||
>>> text = "Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}" | >>> text = "Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}" | ||||
>>> print mwparserfromhell.parse(text).get_tree() | |||||
>>> print(mwparserfromhell.parse(text).get_tree()) | |||||
Lorem ipsum | Lorem ipsum | ||||
{{ | {{ | ||||
foo | foo | ||||
@@ -0,0 +1,170 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
""" | |||||
Tests for memory leaks in the CTokenizer. Python 2 and 3 compatible. | |||||
This appears to work mostly fine under Linux, but gives an absurd number of | |||||
false positives on OS X. I'm not sure why. Running the tests multiple times | |||||
yields different results (tests don't always leak, and the amount they leak by | |||||
varies). Increasing the number of loops results in a smaller bytes/loop value, | |||||
too, indicating the increase in memory usage might be due to something else. | |||||
Actual memory leaks typically leak very large amounts of memory (megabytes) | |||||
and scale with the number of loops. | |||||
""" | |||||
from __future__ import unicode_literals, print_function | |||||
from locale import LC_ALL, setlocale | |||||
from multiprocessing import Process, Pipe | |||||
from os import listdir, path | |||||
import sys | |||||
import psutil | |||||
from mwparserfromhell.compat import py3k | |||||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||||
if sys.version_info[0] == 2: | |||||
range = xrange | |||||
LOOPS = 10000 | |||||
class Color(object): | |||||
GRAY = "\x1b[30;1m" | |||||
GREEN = "\x1b[92m" | |||||
YELLOW = "\x1b[93m" | |||||
RESET = "\x1b[0m" | |||||
class MemoryTest(object): | |||||
"""Manages a memory test.""" | |||||
def __init__(self): | |||||
self._tests = [] | |||||
self._load() | |||||
def _parse_file(self, name, text): | |||||
tests = text.split("\n---\n") | |||||
counter = 1 | |||||
digits = len(str(len(tests))) | |||||
for test in tests: | |||||
data = {"name": None, "label": None, "input": None, "output": None} | |||||
for line in test.strip().splitlines(): | |||||
if line.startswith("name:"): | |||||
data["name"] = line[len("name:"):].strip() | |||||
elif line.startswith("label:"): | |||||
data["label"] = line[len("label:"):].strip() | |||||
elif line.startswith("input:"): | |||||
raw = line[len("input:"):].strip() | |||||
if raw[0] == '"' and raw[-1] == '"': | |||||
raw = raw[1:-1] | |||||
raw = raw.encode("raw_unicode_escape") | |||||
data["input"] = raw.decode("unicode_escape") | |||||
number = str(counter).zfill(digits) | |||||
fname = "test_{0}{1}_{2}".format(name, number, data["name"]) | |||||
self._tests.append((fname, data["input"])) | |||||
counter += 1 | |||||
def _load(self): | |||||
def load_file(filename): | |||||
with open(filename, "rU") as fp: | |||||
text = fp.read() | |||||
if not py3k: | |||||
text = text.decode("utf8") | |||||
name = path.split(filename)[1][:0-len(extension)] | |||||
self._parse_file(name, text) | |||||
root = path.split(path.dirname(path.abspath(__file__)))[0] | |||||
directory = path.join(root, "tests", "tokenizer") | |||||
extension = ".mwtest" | |||||
if len(sys.argv) > 2 and sys.argv[1] == "--use": | |||||
for name in sys.argv[2:]: | |||||
load_file(path.join(directory, name + extension)) | |||||
sys.argv = [sys.argv[0]] # So unittest doesn't try to load these | |||||
else: | |||||
for filename in listdir(directory): | |||||
if not filename.endswith(extension): | |||||
continue | |||||
load_file(path.join(directory, filename)) | |||||
@staticmethod | |||||
def _print_results(info1, info2): | |||||
r1, r2 = info1.rss, info2.rss | |||||
buff = 8192 | |||||
if r2 - buff > r1: | |||||
d = r2 - r1 | |||||
p = float(d) / r1 | |||||
bpt = d // LOOPS | |||||
tmpl = "{0}LEAKING{1}: {2:n} bytes, {3:.2%} inc ({4:n} bytes/loop)" | |||||
sys.stdout.write(tmpl.format(Color.YELLOW, Color.RESET, d, p, bpt)) | |||||
else: | |||||
sys.stdout.write("{0}OK{1}".format(Color.GREEN, Color.RESET)) | |||||
def run(self): | |||||
"""Run the memory test suite.""" | |||||
width = 1 | |||||
for (name, _) in self._tests: | |||||
if len(name) > width: | |||||
width = len(name) | |||||
tmpl = "{0}[{1:03}/{2}]{3} {4}: " | |||||
for i, (name, text) in enumerate(self._tests, 1): | |||||
sys.stdout.write(tmpl.format(Color.GRAY, i, len(self._tests), | |||||
Color.RESET, name.ljust(width))) | |||||
sys.stdout.flush() | |||||
parent, child = Pipe() | |||||
p = Process(target=_runner, args=(text, child)) | |||||
p.start() | |||||
try: | |||||
proc = psutil.Process(p.pid) | |||||
parent.recv() | |||||
parent.send("OK") | |||||
parent.recv() | |||||
info1 = proc.get_memory_info() | |||||
sys.stdout.flush() | |||||
parent.send("OK") | |||||
parent.recv() | |||||
info2 = proc.get_memory_info() | |||||
self._print_results(info1, info2) | |||||
sys.stdout.flush() | |||||
parent.send("OK") | |||||
finally: | |||||
proc.kill() | |||||
print() | |||||
def _runner(text, child): | |||||
r1, r2 = range(250), range(LOOPS) | |||||
for i in r1: | |||||
CTokenizer().tokenize(text) | |||||
child.send("OK") | |||||
child.recv() | |||||
child.send("OK") | |||||
child.recv() | |||||
for i in r2: | |||||
CTokenizer().tokenize(text) | |||||
child.send("OK") | |||||
child.recv() | |||||
if __name__ == "__main__": | |||||
setlocale(LC_ALL, "") | |||||
MemoryTest().run() |
@@ -0,0 +1,165 @@ | |||||
#! /usr/bin/env bash | |||||
if [[ -z "$1" ]]; then | |||||
echo "usage: $0 1.2.3" | |||||
exit 1 | |||||
fi | |||||
VERSION=$1 | |||||
SCRIPT_DIR=$(dirname "$0") | |||||
RELEASE_DATE=$(date +"%B %d, %Y") | |||||
check_git() { | |||||
if [[ -n "$(git status --porcelain --untracked-files=no)" ]]; then | |||||
echo "Aborting: dirty working directory." | |||||
exit 1 | |||||
fi | |||||
if [[ "$(git rev-parse --abbrev-ref HEAD)" != "develop" ]]; then | |||||
echo "Aborting: not on develop." | |||||
exit 1 | |||||
fi | |||||
echo -n "Are you absolutely ready to release? [yN] " | |||||
read confirm | |||||
if [[ ${confirm,,} != "y" ]]; then | |||||
exit 1 | |||||
fi | |||||
} | |||||
update_version() { | |||||
echo -n "Updating mwparserfromhell.__version__..." | |||||
sed -e 's/__version__ = .*/__version__ = "'$VERSION'"/' -i "" mwparserfromhell/__init__.py | |||||
echo " done." | |||||
} | |||||
update_changelog() { | |||||
filename="CHANGELOG" | |||||
echo -n "Updating $filename..." | |||||
sed -e '1s/.*/v'$VERSION' (released '$RELEASE_DATE'):/' -i "" $filename | |||||
echo " done." | |||||
} | |||||
update_docs_changelog() { | |||||
filename="docs/changelog.rst" | |||||
echo -n "Updating $filename..." | |||||
dashes=$(seq 1 $(expr ${#VERSION} + 1) | sed 's/.*/-/' | tr -d '\n') | |||||
previous_lineno=$(expr $(grep -n -e "^---" $filename | sed '2q;d' | cut -d ':' -f 1) - 1) | |||||
previous_version=$(sed $previous_lineno'q;d' $filename) | |||||
sed \ | |||||
-e '4s/.*/v'$VERSION \ | |||||
-e '5s/.*/'$dashes \ | |||||
-e '7s/.*/`Released '$RELEASE_DATE' <https:\/\/github.com\/earwig\/mwparserfromhell\/tree\/v'$VERSION'>`_/' \ | |||||
-e '8s/.*/(`changes <https:\/\/github.com\/earwig\/mwparserfromhell\/compare\/v'$previous_version'...v'$VERSION'>`__):/' \ | |||||
-i "" $filename | |||||
echo " done." | |||||
} | |||||
do_git_stuff() { | |||||
echo -n "Git: committing, tagging, and merging release..." | |||||
git commit -qam "release/$VERSION" | |||||
git tag v$VERSION -s -m "version $VERSION" | |||||
git checkout -q master | |||||
git merge -q --no-ff develop -m "Merge branch 'develop'" | |||||
echo -n " pushing..." | |||||
git push -q --tags origin master | |||||
git checkout -q develop | |||||
git push -q origin develop | |||||
echo " done." | |||||
} | |||||
upload_to_pypi() { | |||||
# TODO: check whether these commands give output | |||||
echo -n "PyPI: uploading source tarball and docs..." | |||||
python setup.py register sdist upload -s | |||||
python setup.py upload_docs | |||||
echo " done." | |||||
} | |||||
windows_build() { | |||||
echo "PyPI: building/uploading Windows binaries..." | |||||
echo "*** Run in Windows: ./scripts/win_build.py" | |||||
echo "*** Press enter when done." | |||||
read | |||||
} | |||||
post_release() { | |||||
echo | |||||
echo "*** Release completed." | |||||
echo "*** Update: https://github.com/earwig/mwparserfromhell/releases/tag/v$VERSION" | |||||
echo "*** Verify: https://pypi.python.org/pypi/mwparserfromhell" | |||||
echo "*** Verify: https://mwparserfromhell.readthedocs.org" | |||||
echo "*** Press enter to sanity-check the release." | |||||
read | |||||
} | |||||
test_release() { | |||||
echo | |||||
echo "Checking mwparserfromhell v$VERSION..." | |||||
echo -n "Creating a virtualenv..." | |||||
virtdir="mwparser-test-env" | |||||
virtualenv -q $virtdir | |||||
cd $virtdir | |||||
source bin/activate | |||||
echo " done." | |||||
echo -n "Installing mwparserfromhell with pip..." | |||||
pip -q install mwparserfromhell | |||||
echo " done." | |||||
echo -n "Checking version..." | |||||
reported_version=$(python -c 'print __import__("mwparserfromhell").__version__') | |||||
if [[ "$reported_version" != "$VERSION" ]]; then | |||||
echo " error." | |||||
echo "*** ERROR: mwparserfromhell is reporting its version as $reported_version, not $VERSION!" | |||||
deactivate | |||||
cd .. | |||||
rm -rf $virtdir | |||||
exit 1 | |||||
else | |||||
echo " done." | |||||
fi | |||||
pip -q uninstall -y mwparserfromhell | |||||
echo -n "Downloading mwparserfromhell source tarball and GPG signature..." | |||||
curl -sL "https://pypi.python.org/packages/source/m/mwparserfromhell/mwparserfromhell-$VERSION.tar.gz" -o "mwparserfromhell.tar.gz" | |||||
curl -sL "https://pypi.python.org/packages/source/m/mwparserfromhell/mwparserfromhell-$VERSION.tar.gz.asc" -o "mwparserfromhell.tar.gz.asc" | |||||
echo " done." | |||||
echo "Verifying tarball..." | |||||
gpg --verify mwparserfromhell.tar.gz.asc | |||||
if [[ "$?" != "0" ]]; then | |||||
echo "*** ERROR: GPG signature verification failed!" | |||||
deactivate | |||||
cd .. | |||||
rm -rf $virtdir | |||||
exit 1 | |||||
fi | |||||
tar -xf mwparserfromhell.tar.gz | |||||
rm mwparserfromhell.tar.gz mwparserfromhell.tar.gz.asc | |||||
cd mwparserfromhell-$VERSION | |||||
echo "Running unit tests..." | |||||
python setup.py -q test | |||||
if [[ "$?" != "0" ]]; then | |||||
echo "*** ERROR: Unit tests failed!" | |||||
deactivate | |||||
cd ../.. | |||||
rm -rf $virtdir | |||||
exit 1 | |||||
fi | |||||
echo -n "Everything looks good. Cleaning up..." | |||||
deactivate | |||||
cd ../.. | |||||
rm -rf $virtdir | |||||
echo " done." | |||||
} | |||||
echo "Preparing mwparserfromhell v$VERSION..." | |||||
cd "$SCRIPT_DIR/.." | |||||
check_git | |||||
update_version | |||||
update_changelog | |||||
update_docs_changelog | |||||
do_git_stuff | |||||
upload_to_pypi | |||||
windows_build | |||||
post_release | |||||
test_release | |||||
echo "All done." | |||||
exit 0 |
@@ -0,0 +1,58 @@ | |||||
# Build requirements: | |||||
# | |||||
# Python 2.6-3.2: Visual C++ Express Edition 2008: | |||||
# http://go.microsoft.com/?linkid=7729279 | |||||
# | |||||
# Python 3.3+: Visual C++ Express Edition 2010: | |||||
# http://go.microsoft.com/?linkid=9709949 | |||||
# | |||||
# x64 builds: Microsoft Windows SDK for Windows 7 and .NET Framework 3.5 SP1: | |||||
# http://www.microsoft.com/en-us/download/details.aspx?id=3138 | |||||
# | |||||
# Python interpreter, 2.6, 2.7, 3.2-3.4: | |||||
# https://www.python.org/downloads/ | |||||
# | |||||
# Pip, setuptools, wheel: | |||||
# https://bootstrap.pypa.io/get-pip.py | |||||
# and run *for each* Python version: | |||||
# c:\pythonXX\python get-pip.py | |||||
# c:\pythonXX\scripts\pip install wheel | |||||
# | |||||
# Afterwards, run this script with any of the python interpreters (2.7 suggested) | |||||
from __future__ import print_function | |||||
import os | |||||
from subprocess import call, STDOUT | |||||
ENVIRONMENTS = ["26", "27", "32", "33", "34"] | |||||
def run(pyver, cmds): | |||||
cmd = [r"C:\Python%s\Python.exe" % pyver, "setup.py"] + cmds | |||||
print(" ".join(cmd), end=" ") | |||||
with open("%s%s.log" % (cmds[0], pyver), "w") as logfile: | |||||
retval = call(cmd, stdout=logfile, stderr=STDOUT, cwd="..") | |||||
if not retval: | |||||
print("[OK]") | |||||
else: | |||||
print("[FAILED (%i)]" % retval) | |||||
return retval | |||||
def main(): | |||||
path = os.path.split(__file__)[0] | |||||
if path: | |||||
os.chdir(path) | |||||
print("Building Windows wheels for Python %s:" % ", ".join(ENVIRONMENTS)) | |||||
for pyver in ENVIRONMENTS: | |||||
print() | |||||
try: | |||||
os.unlink("mwparserfromhell/parser/_tokenizer.pyd") | |||||
except OSError: | |||||
pass | |||||
if run(pyver, ["test"]) == 0: | |||||
run(pyver, ["bdist_wheel", "upload"]) # TODO: add "-s" to GPG sign | |||||
if __name__ == "__main__": | |||||
main() |
@@ -1,7 +1,7 @@ | |||||
#! /usr/bin/env python | #! /usr/bin/env python | ||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,24 +21,85 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
import os | |||||
import sys | import sys | ||||
if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \ | if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \ | ||||
(sys.version_info[1] == 3 and sys.version_info[1] < 2): | (sys.version_info[1] == 3 and sys.version_info[1] < 2): | ||||
raise Exception('mwparserfromhell needs Python 2.6+ or 3.2+') | |||||
raise Exception("mwparserfromhell needs Python 2.6+ or 3.2+") | |||||
if sys.version_info >= (3, 0): | |||||
basestring = (str, ) | |||||
from setuptools import setup, find_packages, Extension | from setuptools import setup, find_packages, Extension | ||||
from mwparserfromhell import __version__ | from mwparserfromhell import __version__ | ||||
from mwparserfromhell.compat import py26, py3k | from mwparserfromhell.compat import py26, py3k | ||||
with open("README.rst") as fp: | |||||
with open("README.rst", **{'encoding':'utf-8'} if py3k else {}) as fp: | |||||
long_docs = fp.read() | long_docs = fp.read() | ||||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | tokenizer = Extension("mwparserfromhell.parser._tokenizer", | ||||
sources = ["mwparserfromhell/parser/tokenizer.c"]) | |||||
sources=["mwparserfromhell/parser/tokenizer.c"], | |||||
depends=["mwparserfromhell/parser/tokenizer.h"]) | |||||
use_extension=True | |||||
# Allow env var WITHOUT_EXTENSION and args --with[out]-extension | |||||
if '--without-extension' in sys.argv: | |||||
use_extension = False | |||||
elif '--with-extension' in sys.argv: | |||||
pass | |||||
elif os.environ.get('WITHOUT_EXTENSION', '0') == '1': | |||||
use_extension = False | |||||
# Remove the command line argument as it isnt understood by | |||||
# setuptools/distutils | |||||
sys.argv = [arg for arg in sys.argv | |||||
if not arg.startswith('--with') | |||||
and not arg.endswith('-extension')] | |||||
def optional_compile_setup(func=setup, use_ext=use_extension, | |||||
*args, **kwargs): | |||||
""" | |||||
Wrap setup to allow optional compilation of extensions. | |||||
Falls back to pure python mode (no extensions) | |||||
if compilation of extensions fails. | |||||
""" | |||||
extensions = kwargs.get('ext_modules', None) | |||||
if use_ext and extensions: | |||||
try: | |||||
func(*args, **kwargs) | |||||
return | |||||
except SystemExit as e: | |||||
assert(e.args) | |||||
if e.args[0] is False: | |||||
raise | |||||
elif isinstance(e.args[0], basestring): | |||||
if e.args[0].startswith('usage: '): | |||||
raise | |||||
else: | |||||
# Fallback to pure python mode | |||||
print('setup with extension failed: %s' % repr(e)) | |||||
pass | |||||
except Exception as e: | |||||
print('setup with extension failed: %s' % repr(e)) | |||||
if extensions: | |||||
if use_ext: | |||||
print('Falling back to pure python mode.') | |||||
else: | |||||
print('Using pure python mode.') | |||||
del kwargs['ext_modules'] | |||||
func(*args, **kwargs) | |||||
setup( | |||||
optional_compile_setup( | |||||
name = "mwparserfromhell", | name = "mwparserfromhell", | ||||
packages = find_packages(exclude=("tests",)), | packages = find_packages(exclude=("tests",)), | ||||
ext_modules = [tokenizer], | ext_modules = [tokenizer], | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,11 +21,13 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import print_function, unicode_literals | from __future__ import print_function, unicode_literals | ||||
import codecs | |||||
from os import listdir, path | from os import listdir, path | ||||
import sys | import sys | ||||
from mwparserfromhell.compat import py3k | |||||
from mwparserfromhell.compat import py3k, str | |||||
from mwparserfromhell.parser import tokens | from mwparserfromhell.parser import tokens | ||||
from mwparserfromhell.parser.builder import Builder | |||||
class _TestParseError(Exception): | class _TestParseError(Exception): | ||||
"""Raised internally when a test could not be parsed.""" | """Raised internally when a test could not be parsed.""" | ||||
@@ -49,8 +51,12 @@ class TokenizerTestCase(object): | |||||
*label* for the method's docstring. | *label* for the method's docstring. | ||||
""" | """ | ||||
def inner(self): | def inner(self): | ||||
expected = data["output"] | |||||
actual = self.tokenizer().tokenize(data["input"]) | |||||
if hasattr(self, "roundtrip"): | |||||
expected = data["input"] | |||||
actual = str(Builder().build(data["output"][:])) | |||||
else: | |||||
expected = data["output"] | |||||
actual = self.tokenizer().tokenize(data["input"]) | |||||
self.assertEqual(expected, actual) | self.assertEqual(expected, actual) | ||||
if not py3k: | if not py3k: | ||||
inner.__name__ = funcname.encode("utf8") | inner.__name__ = funcname.encode("utf8") | ||||
@@ -109,10 +115,8 @@ class TokenizerTestCase(object): | |||||
def build(cls): | def build(cls): | ||||
"""Load and install all tests from the 'tokenizer' directory.""" | """Load and install all tests from the 'tokenizer' directory.""" | ||||
def load_file(filename): | def load_file(filename): | ||||
with open(filename, "rU") as fp: | |||||
with codecs.open(filename, "rU", encoding="utf8") as fp: | |||||
text = fp.read() | text = fp.read() | ||||
if not py3k: | |||||
text = text.decode("utf8") | |||||
name = path.split(filename)[1][:0-len(extension)] | name = path.split(filename)[1][:0-len(extension)] | ||||
cls._load_tests(filename, name, text) | cls._load_tests(filename, name, text) | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -98,7 +98,7 @@ class TreeEqualityTestCase(TestCase): | |||||
self.assertWikicodeEqual(exp_attr.name, act_attr.name) | self.assertWikicodeEqual(exp_attr.name, act_attr.name) | ||||
if exp_attr.value is not None: | if exp_attr.value is not None: | ||||
self.assertWikicodeEqual(exp_attr.value, act_attr.value) | self.assertWikicodeEqual(exp_attr.value, act_attr.value) | ||||
self.assertIs(exp_attr.quoted, act_attr.quoted) | |||||
self.assertEqual(exp_attr.quotes, act_attr.quotes) | |||||
self.assertEqual(exp_attr.pad_first, act_attr.pad_first) | self.assertEqual(exp_attr.pad_first, act_attr.pad_first) | ||||
self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq) | self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq) | ||||
self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq) | self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq) | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -42,12 +42,14 @@ class TestAttribute(TreeEqualityTestCase): | |||||
self.assertEqual(" foo", str(node)) | self.assertEqual(" foo", str(node)) | ||||
node2 = Attribute(wraptext("foo"), wraptext("bar")) | node2 = Attribute(wraptext("foo"), wraptext("bar")) | ||||
self.assertEqual(' foo="bar"', str(node2)) | self.assertEqual(' foo="bar"', str(node2)) | ||||
node3 = Attribute(wraptext("a"), wraptext("b"), True, "", " ", " ") | |||||
node3 = Attribute(wraptext("a"), wraptext("b"), '"', "", " ", " ") | |||||
self.assertEqual('a = "b"', str(node3)) | self.assertEqual('a = "b"', str(node3)) | ||||
node3 = Attribute(wraptext("a"), wraptext("b"), False, "", " ", " ") | |||||
self.assertEqual("a = b", str(node3)) | |||||
node4 = Attribute(wraptext("a"), wrap([]), False, " ", "", " ") | |||||
self.assertEqual(" a= ", str(node4)) | |||||
node4 = Attribute(wraptext("a"), wraptext("b"), "'", "", " ", " ") | |||||
self.assertEqual("a = 'b'", str(node4)) | |||||
node5 = Attribute(wraptext("a"), wraptext("b"), None, "", " ", " ") | |||||
self.assertEqual("a = b", str(node5)) | |||||
node6 = Attribute(wraptext("a"), wrap([]), None, " ", "", " ") | |||||
self.assertEqual(" a= ", str(node6)) | |||||
def test_name(self): | def test_name(self): | ||||
"""test getter/setter for the name attribute""" | """test getter/setter for the name attribute""" | ||||
@@ -66,17 +68,35 @@ class TestAttribute(TreeEqualityTestCase): | |||||
self.assertWikicodeEqual(wrap([Template(wraptext("bar"))]), node.value) | self.assertWikicodeEqual(wrap([Template(wraptext("bar"))]), node.value) | ||||
node.value = None | node.value = None | ||||
self.assertIs(None, node.value) | self.assertIs(None, node.value) | ||||
node2 = Attribute(wraptext("id"), wraptext("foo"), None) | |||||
node2.value = "foo bar baz" | |||||
self.assertWikicodeEqual(wraptext("foo bar baz"), node2.value) | |||||
self.assertEqual('"', node2.quotes) | |||||
node2.value = 'foo "bar" baz' | |||||
self.assertWikicodeEqual(wraptext('foo "bar" baz'), node2.value) | |||||
self.assertEqual("'", node2.quotes) | |||||
node2.value = "foo 'bar' baz" | |||||
self.assertWikicodeEqual(wraptext("foo 'bar' baz"), node2.value) | |||||
self.assertEqual('"', node2.quotes) | |||||
node2.value = "fo\"o 'bar' b\"az" | |||||
self.assertWikicodeEqual(wraptext("fo\"o 'bar' b\"az"), node2.value) | |||||
self.assertEqual('"', node2.quotes) | |||||
def test_quoted(self): | |||||
"""test getter/setter for the quoted attribute""" | |||||
node1 = Attribute(wraptext("id"), wraptext("foo"), False) | |||||
def test_quotes(self): | |||||
"""test getter/setter for the quotes attribute""" | |||||
node1 = Attribute(wraptext("id"), wraptext("foo"), None) | |||||
node2 = Attribute(wraptext("id"), wraptext("bar")) | node2 = Attribute(wraptext("id"), wraptext("bar")) | ||||
self.assertFalse(node1.quoted) | |||||
self.assertTrue(node2.quoted) | |||||
node1.quoted = True | |||||
node2.quoted = "" | |||||
self.assertTrue(node1.quoted) | |||||
self.assertFalse(node2.quoted) | |||||
node3 = Attribute(wraptext("id"), wraptext("foo bar baz")) | |||||
self.assertIs(None, node1.quotes) | |||||
self.assertEqual('"', node2.quotes) | |||||
node1.quotes = "'" | |||||
node2.quotes = None | |||||
self.assertEqual("'", node1.quotes) | |||||
self.assertIs(None, node2.quotes) | |||||
self.assertRaises(ValueError, setattr, node1, "quotes", "foobar") | |||||
self.assertRaises(ValueError, setattr, node3, "quotes", None) | |||||
self.assertRaises(ValueError, Attribute, wraptext("id"), | |||||
wraptext("foo bar baz"), None) | |||||
def test_padding(self): | def test_padding(self): | ||||
"""test getter/setter for the padding attributes""" | """test getter/setter for the padding attributes""" | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -27,10 +27,11 @@ try: | |||||
except ImportError: | except ImportError: | ||||
import unittest | import unittest | ||||
from mwparserfromhell.compat import py3k | |||||
from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, | from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, | ||||
HTMLEntity, Tag, Template, Text, Wikilink) | HTMLEntity, Tag, Template, Text, Wikilink) | ||||
from mwparserfromhell.nodes.extras import Attribute, Parameter | from mwparserfromhell.nodes.extras import Attribute, Parameter | ||||
from mwparserfromhell.parser import tokens | |||||
from mwparserfromhell.parser import tokens, ParserError | |||||
from mwparserfromhell.parser.builder import Builder | from mwparserfromhell.parser.builder import Builder | ||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | ||||
@@ -269,7 +270,7 @@ class TestBuilder(TreeEqualityTestCase): | |||||
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | ||||
pad_after_eq=""), | pad_after_eq=""), | ||||
tokens.Text(text="name"), tokens.TagAttrEquals(), | tokens.Text(text="name"), tokens.TagAttrEquals(), | ||||
tokens.TagAttrQuote(), tokens.Text(text="abc"), | |||||
tokens.TagAttrQuote(char='"'), tokens.Text(text="abc"), | |||||
tokens.TagCloseSelfclose(padding=" ")], | tokens.TagCloseSelfclose(padding=" ")], | ||||
wrap([Tag(wraptext("ref"), | wrap([Tag(wraptext("ref"), | ||||
attrs=[Attribute(wraptext("name"), wraptext("abc"))], | attrs=[Attribute(wraptext("name"), wraptext("abc"))], | ||||
@@ -297,7 +298,7 @@ class TestBuilder(TreeEqualityTestCase): | |||||
wrap([Tag(wraptext("br"), self_closing=True, invalid=True)])), | wrap([Tag(wraptext("br"), self_closing=True, invalid=True)])), | ||||
# <ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} | # <ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} | ||||
# mno = "{{p}} [[q]] {{r}}">[[Source]]</ref> | |||||
# mno = '{{p}} [[q]] {{r}}'>[[Source]]</ref> | |||||
([tokens.TagOpenOpen(), tokens.Text(text="ref"), | ([tokens.TagOpenOpen(), tokens.Text(text="ref"), | ||||
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | ||||
pad_after_eq=""), | pad_after_eq=""), | ||||
@@ -307,7 +308,7 @@ class TestBuilder(TreeEqualityTestCase): | |||||
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | ||||
pad_after_eq=""), | pad_after_eq=""), | ||||
tokens.Text(text="foo"), tokens.TagAttrEquals(), | tokens.Text(text="foo"), tokens.TagAttrEquals(), | ||||
tokens.TagAttrQuote(), tokens.Text(text="bar "), | |||||
tokens.TagAttrQuote(char='"'), tokens.Text(text="bar "), | |||||
tokens.TemplateOpen(), tokens.Text(text="baz"), | tokens.TemplateOpen(), tokens.Text(text="baz"), | ||||
tokens.TemplateClose(), | tokens.TemplateClose(), | ||||
tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | tokens.TagAttrStart(pad_first=" ", pad_before_eq="", | ||||
@@ -325,7 +326,7 @@ class TestBuilder(TreeEqualityTestCase): | |||||
tokens.TagAttrStart(pad_first=" \n ", pad_before_eq=" ", | tokens.TagAttrStart(pad_first=" \n ", pad_before_eq=" ", | ||||
pad_after_eq=" "), | pad_after_eq=" "), | ||||
tokens.Text(text="mno"), tokens.TagAttrEquals(), | tokens.Text(text="mno"), tokens.TagAttrEquals(), | ||||
tokens.TagAttrQuote(), tokens.TemplateOpen(), | |||||
tokens.TagAttrQuote(char="'"), tokens.TemplateOpen(), | |||||
tokens.Text(text="p"), tokens.TemplateClose(), | tokens.Text(text="p"), tokens.TemplateClose(), | ||||
tokens.Text(text=" "), tokens.WikilinkOpen(), | tokens.Text(text=" "), tokens.WikilinkOpen(), | ||||
tokens.Text(text="q"), tokens.WikilinkClose(), | tokens.Text(text="q"), tokens.WikilinkClose(), | ||||
@@ -337,17 +338,17 @@ class TestBuilder(TreeEqualityTestCase): | |||||
tokens.TagCloseClose()], | tokens.TagCloseClose()], | ||||
wrap([Tag(wraptext("ref"), wrap([Wikilink(wraptext("Source"))]), [ | wrap([Tag(wraptext("ref"), wrap([Wikilink(wraptext("Source"))]), [ | ||||
Attribute(wraptext("name"), | Attribute(wraptext("name"), | ||||
wrap([Template(wraptext("abc"))]), False), | |||||
wrap([Template(wraptext("abc"))]), None), | |||||
Attribute(wraptext("foo"), wrap([Text("bar "), | Attribute(wraptext("foo"), wrap([Text("bar "), | ||||
Template(wraptext("baz"))]), pad_first=" "), | Template(wraptext("baz"))]), pad_first=" "), | ||||
Attribute(wraptext("abc"), wrap([Template(wraptext("de")), | Attribute(wraptext("abc"), wrap([Template(wraptext("de")), | ||||
Text("f")]), False), | |||||
Text("f")]), None), | |||||
Attribute(wraptext("ghi"), wrap([Text("j"), | Attribute(wraptext("ghi"), wrap([Text("j"), | ||||
Template(wraptext("k")), | Template(wraptext("k")), | ||||
Template(wraptext("l"))]), False), | |||||
Template(wraptext("l"))]), None), | |||||
Attribute(wraptext("mno"), wrap([Template(wraptext("p")), | Attribute(wraptext("mno"), wrap([Template(wraptext("p")), | ||||
Text(" "), Wikilink(wraptext("q")), Text(" "), | Text(" "), Wikilink(wraptext("q")), Text(" "), | ||||
Template(wraptext("r"))]), True, " \n ", " ", | |||||
Template(wraptext("r"))]), "'", " \n ", " ", | |||||
" ")])])), | " ")])])), | ||||
# "''italic text''" | # "''italic text''" | ||||
@@ -420,5 +421,22 @@ class TestBuilder(TreeEqualityTestCase): | |||||
named=True)]))])]) | named=True)]))])]) | ||||
self.assertWikicodeEqual(valid, self.builder.build(test)) | self.assertWikicodeEqual(valid, self.builder.build(test)) | ||||
def test_parser_errors(self): | |||||
"""test whether ParserError gets thrown for bad input""" | |||||
missing_closes = [ | |||||
[tokens.TemplateOpen(), tokens.TemplateParamSeparator()], | |||||
[tokens.TemplateOpen()], [tokens.ArgumentOpen()], | |||||
[tokens.WikilinkOpen()], [tokens.ExternalLinkOpen()], | |||||
[tokens.HeadingStart()], [tokens.CommentStart()], | |||||
[tokens.TagOpenOpen(), tokens.TagAttrStart()], | |||||
[tokens.TagOpenOpen()] | |||||
] | |||||
func = self.assertRaisesRegex if py3k else self.assertRaisesRegexp | |||||
msg = r"_handle_token\(\) got unexpected TemplateClose" | |||||
func(ParserError, msg, self.builder.build, [tokens.TemplateClose()]) | |||||
for test in missing_closes: | |||||
self.assertRaises(ParserError, self.builder.build, test) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -22,6 +22,7 @@ | |||||
from __future__ import print_function, unicode_literals | from __future__ import print_function, unicode_literals | ||||
import json | import json | ||||
import os | |||||
try: | try: | ||||
import unittest2 as unittest | import unittest2 as unittest | ||||
@@ -111,6 +112,7 @@ class TestDocs(unittest.TestCase): | |||||
self.assertPrint(text, res) | self.assertPrint(text, res) | ||||
self.assertEqual(text, code) | self.assertEqual(text, code) | ||||
@unittest.skipIf("NOWEB" in os.environ, "web test disabled by environ var") | |||||
def test_readme_5(self): | def test_readme_5(self): | ||||
"""test a block of example code in the README; includes a web call""" | """test a block of example code in the README; includes a web call""" | ||||
url1 = "http://en.wikipedia.org/w/api.php" | url1 = "http://en.wikipedia.org/w/api.php" | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -108,6 +108,7 @@ class TestHTMLEntity(TreeEqualityTestCase): | |||||
self.assertRaises(ValueError, setattr, node3, "value", -1) | self.assertRaises(ValueError, setattr, node3, "value", -1) | ||||
self.assertRaises(ValueError, setattr, node1, "value", 110000) | self.assertRaises(ValueError, setattr, node1, "value", 110000) | ||||
self.assertRaises(ValueError, setattr, node1, "value", "1114112") | self.assertRaises(ValueError, setattr, node1, "value", "1114112") | ||||
self.assertRaises(ValueError, setattr, node1, "value", "12FFFF") | |||||
def test_named(self): | def test_named(self): | ||||
"""test getter/setter for the named attribute""" | """test getter/setter for the named attribute""" | ||||
@@ -163,10 +164,14 @@ class TestHTMLEntity(TreeEqualityTestCase): | |||||
node2 = HTMLEntity("107") | node2 = HTMLEntity("107") | ||||
node3 = HTMLEntity("e9") | node3 = HTMLEntity("e9") | ||||
node4 = HTMLEntity("1f648") | node4 = HTMLEntity("1f648") | ||||
node5 = HTMLEntity("-2") | |||||
node6 = HTMLEntity("110000", named=False, hexadecimal=True) | |||||
self.assertEqual("\xa0", node1.normalize()) | self.assertEqual("\xa0", node1.normalize()) | ||||
self.assertEqual("k", node2.normalize()) | self.assertEqual("k", node2.normalize()) | ||||
self.assertEqual("é", node3.normalize()) | self.assertEqual("é", node3.normalize()) | ||||
self.assertEqual("\U0001F648", node4.normalize()) | self.assertEqual("\U0001F648", node4.normalize()) | ||||
self.assertRaises(ValueError, node5.normalize) | |||||
self.assertRaises(ValueError, node6.normalize) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -71,9 +71,10 @@ class TestParameter(TreeEqualityTestCase): | |||||
self.assertFalse(node1.showkey) | self.assertFalse(node1.showkey) | ||||
self.assertTrue(node2.showkey) | self.assertTrue(node2.showkey) | ||||
node1.showkey = True | node1.showkey = True | ||||
node2.showkey = "" | |||||
self.assertTrue(node1.showkey) | self.assertTrue(node1.showkey) | ||||
self.assertFalse(node2.showkey) | |||||
node1.showkey = "" | |||||
self.assertFalse(node1.showkey) | |||||
self.assertRaises(ValueError, setattr, node2, "showkey", False) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -0,0 +1,41 @@ | |||||
# -*- coding: utf-8 -*- | |||||
# | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | |||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
# of this software and associated documentation files (the "Software"), to deal | |||||
# in the Software without restriction, including without limitation the rights | |||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
# copies of the Software, and to permit persons to whom the Software is | |||||
# furnished to do so, subject to the following conditions: | |||||
# | |||||
# The above copyright notice and this permission notice shall be included in | |||||
# all copies or substantial portions of the Software. | |||||
# | |||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
# SOFTWARE. | |||||
from __future__ import unicode_literals | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from ._test_tokenizer import TokenizerTestCase | |||||
class TestRoundtripping(TokenizerTestCase, unittest.TestCase): | |||||
"""Test cases for roundtripping tokens back to wikitext.""" | |||||
@classmethod | |||||
def setUpClass(cls): | |||||
cls.roundtrip = True | |||||
if __name__ == "__main__": | |||||
unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -88,6 +88,10 @@ class TestSmartList(unittest.TestCase): | |||||
self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2) | self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2) | ||||
self.assertRaises(ValueError, assign, list2, 0, 5, 2, | self.assertRaises(ValueError, assign, list2, 0, 5, 2, | ||||
[100, 102, 104, 106]) | [100, 102, 104, 106]) | ||||
with self.assertRaises(IndexError): | |||||
list2[7] = "foo" | |||||
with self.assertRaises(IndexError): | |||||
list2[-8] = "foo" | |||||
del list2[2] | del list2[2] | ||||
self.assertEqual([0, 1, 3, 4, 5, 6], list2) | self.assertEqual([0, 1, 3, 4, 5, 6], list2) | ||||
@@ -271,6 +275,13 @@ class TestSmartList(unittest.TestCase): | |||||
list3.sort(key=lambda i: i[1], reverse=True) | list3.sort(key=lambda i: i[1], reverse=True) | ||||
self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) | self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) | ||||
def _dispatch_test_for_children(self, meth): | |||||
"""Run a test method on various different types of children.""" | |||||
meth(lambda L: SmartList(list(L))[:]) | |||||
meth(lambda L: SmartList([999] + list(L))[1:]) | |||||
meth(lambda L: SmartList(list(L) + [999])[:-1]) | |||||
meth(lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]) | |||||
def test_docs(self): | def test_docs(self): | ||||
"""make sure the methods of SmartList/_ListProxy have docstrings""" | """make sure the methods of SmartList/_ListProxy have docstrings""" | ||||
methods = ["append", "count", "extend", "index", "insert", "pop", | methods = ["append", "count", "extend", "index", "insert", "pop", | ||||
@@ -300,8 +311,8 @@ class TestSmartList(unittest.TestCase): | |||||
"""make sure SmartList's add/radd/iadd work""" | """make sure SmartList's add/radd/iadd work""" | ||||
self._test_add_radd_iadd(SmartList) | self._test_add_radd_iadd(SmartList) | ||||
def test_parent_unaffected_magics(self): | |||||
"""sanity checks against SmartList features that were not modified""" | |||||
def test_parent_other_magics(self): | |||||
"""make sure SmartList's other magically implemented features work""" | |||||
self._test_other_magic_methods(SmartList) | self._test_other_magic_methods(SmartList) | ||||
def test_parent_methods(self): | def test_parent_methods(self): | ||||
@@ -310,41 +321,29 @@ class TestSmartList(unittest.TestCase): | |||||
def test_child_get_set_del(self): | def test_child_get_set_del(self): | ||||
"""make sure _ListProxy's getitem/setitem/delitem work""" | """make sure _ListProxy's getitem/setitem/delitem work""" | ||||
self._test_get_set_del_item(lambda L: SmartList(list(L))[:]) | |||||
self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:]) | |||||
self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) | |||||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||||
self._test_get_set_del_item(builder) | |||||
self._dispatch_test_for_children(self._test_get_set_del_item) | |||||
def test_child_add(self): | def test_child_add(self): | ||||
"""make sure _ListProxy's add/radd/iadd work""" | """make sure _ListProxy's add/radd/iadd work""" | ||||
self._test_add_radd_iadd(lambda L: SmartList(list(L))[:]) | |||||
self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:]) | |||||
self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1]) | |||||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||||
self._test_add_radd_iadd(builder) | |||||
self._dispatch_test_for_children(self._test_add_radd_iadd) | |||||
def test_child_other_magics(self): | def test_child_other_magics(self): | ||||
"""make sure _ListProxy's other magically implemented features work""" | """make sure _ListProxy's other magically implemented features work""" | ||||
self._test_other_magic_methods(lambda L: SmartList(list(L))[:]) | |||||
self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:]) | |||||
self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1]) | |||||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||||
self._test_other_magic_methods(builder) | |||||
self._dispatch_test_for_children(self._test_other_magic_methods) | |||||
def test_child_methods(self): | def test_child_methods(self): | ||||
"""make sure _ListProxy's non-magic methods work, like append()""" | """make sure _ListProxy's non-magic methods work, like append()""" | ||||
self._test_list_methods(lambda L: SmartList(list(L))[:]) | |||||
self._test_list_methods(lambda L: SmartList([999] + list(L))[1:]) | |||||
self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1]) | |||||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||||
self._test_list_methods(builder) | |||||
self._dispatch_test_for_children(self._test_list_methods) | |||||
def test_influence(self): | def test_influence(self): | ||||
"""make sure changes are propagated from parents to children""" | """make sure changes are propagated from parents to children""" | ||||
parent = SmartList([0, 1, 2, 3, 4, 5]) | parent = SmartList([0, 1, 2, 3, 4, 5]) | ||||
child1 = parent[2:] | child1 = parent[2:] | ||||
child2 = parent[2:5] | child2 = parent[2:5] | ||||
self.assertEqual([0, 1, 2, 3, 4, 5], parent) | |||||
self.assertEqual([2, 3, 4, 5], child1) | |||||
self.assertEqual([2, 3, 4], child2) | |||||
self.assertEqual(2, len(parent._children)) | |||||
parent.append(6) | parent.append(6) | ||||
child1.append(7) | child1.append(7) | ||||
@@ -390,5 +389,28 @@ class TestSmartList(unittest.TestCase): | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | ||||
self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | ||||
child1.detach() | |||||
self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | |||||
self.assertEqual(1, len(parent._children)) | |||||
parent.remove(1.9) | |||||
parent.remove(1.8) | |||||
self.assertEqual([1, 4, 3, 2, 5, 6, 7, 8, 8.1, 8.2], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | |||||
self.assertEqual([4, 3, 2], child2) | |||||
parent.reverse() | |||||
self.assertEqual([8.2, 8.1, 8, 7, 6, 5, 2, 3, 4, 1], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | |||||
self.assertEqual([4, 3, 2], child2) | |||||
self.assertEqual(0, len(parent._children)) | |||||
child2.detach() | |||||
self.assertEqual([8.2, 8.1, 8, 7, 6, 5, 2, 3, 4, 1], parent) | |||||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | |||||
self.assertEqual([4, 3, 2], child2) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -33,9 +33,10 @@ from mwparserfromhell.nodes.extras import Attribute | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | ||||
agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) | agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) | ||||
agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False) | |||||
agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, True, a, b, c) | |||||
agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, True, a, b, c) | |||||
agennv = lambda name: Attribute(wraptext(name)) | |||||
agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None) | |||||
agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c) | |||||
agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c) | |||||
class TestTag(TreeEqualityTestCase): | class TestTag(TreeEqualityTestCase): | ||||
"""Test cases for the Tag node.""" | """Test cases for the Tag node.""" | ||||
@@ -74,10 +75,10 @@ class TestTag(TreeEqualityTestCase): | |||||
node1 = Tag(wraptext("ref"), wraptext("foobar")) | node1 = Tag(wraptext("ref"), wraptext("foobar")) | ||||
# '''bold text''' | # '''bold text''' | ||||
node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") | node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") | ||||
# <img id="foo" class="bar" /> | |||||
# <img id="foo" class="bar" selected /> | |||||
node3 = Tag(wraptext("img"), | node3 = Tag(wraptext("img"), | ||||
attrs=[Attribute(wraptext("id"), wraptext("foo")), | |||||
Attribute(wraptext("class"), wraptext("bar"))], | |||||
attrs=[agen("id", "foo"), agen("class", "bar"), | |||||
agennv("selected")], | |||||
self_closing=True, padding=" ") | self_closing=True, padding=" ") | ||||
gen1 = node1.__children__() | gen1 = node1.__children__() | ||||
@@ -89,6 +90,7 @@ class TestTag(TreeEqualityTestCase): | |||||
self.assertEqual(node3.attributes[0].value, next(gen3)) | self.assertEqual(node3.attributes[0].value, next(gen3)) | ||||
self.assertEqual(node3.attributes[1].name, next(gen3)) | self.assertEqual(node3.attributes[1].name, next(gen3)) | ||||
self.assertEqual(node3.attributes[1].value, next(gen3)) | self.assertEqual(node3.attributes[1].value, next(gen3)) | ||||
self.assertEqual(node3.attributes[2].name, next(gen3)) | |||||
self.assertEqual(node1.contents, next(gen1)) | self.assertEqual(node1.contents, next(gen1)) | ||||
self.assertEqual(node2.contents, next(gen2)) | self.assertEqual(node2.contents, next(gen2)) | ||||
self.assertEqual(node1.closing_tag, next(gen1)) | self.assertEqual(node1.closing_tag, next(gen1)) | ||||
@@ -113,7 +115,8 @@ class TestTag(TreeEqualityTestCase): | |||||
getter, marker = object(), object() | getter, marker = object(), object() | ||||
get = lambda code: output.append((getter, code)) | get = lambda code: output.append((getter, code)) | ||||
mark = lambda: output.append(marker) | mark = lambda: output.append(marker) | ||||
node1 = Tag(wraptext("ref"), wraptext("text"), [agen("name", "foo")]) | |||||
node1 = Tag(wraptext("ref"), wraptext("text"), | |||||
[agen("name", "foo"), agennv("selected")]) | |||||
node2 = Tag(wraptext("br"), self_closing=True, padding=" ") | node2 = Tag(wraptext("br"), self_closing=True, padding=" ") | ||||
node3 = Tag(wraptext("br"), self_closing=True, invalid=True, | node3 = Tag(wraptext("br"), self_closing=True, invalid=True, | ||||
implicit=True, padding=" ") | implicit=True, padding=" ") | ||||
@@ -122,9 +125,10 @@ class TestTag(TreeEqualityTestCase): | |||||
node3.__showtree__(output.append, get, mark) | node3.__showtree__(output.append, get, mark) | ||||
valid = [ | valid = [ | ||||
"<", (getter, node1.tag), (getter, node1.attributes[0].name), | "<", (getter, node1.tag), (getter, node1.attributes[0].name), | ||||
" = ", marker, (getter, node1.attributes[0].value), ">", | |||||
(getter, node1.contents), "</", (getter, node1.closing_tag), ">", | |||||
"<", (getter, node2.tag), "/>", "</", (getter, node3.tag), ">"] | |||||
" = ", marker, (getter, node1.attributes[0].value), | |||||
(getter, node1.attributes[1].name), ">", (getter, node1.contents), | |||||
"</", (getter, node1.closing_tag), ">", "<", (getter, node2.tag), | |||||
"/>", "</", (getter, node3.tag), ">"] | |||||
self.assertEqual(valid, output) | self.assertEqual(valid, output) | ||||
def test_tag(self): | def test_tag(self): | ||||
@@ -222,6 +226,38 @@ class TestTag(TreeEqualityTestCase): | |||||
self.assertWikicodeEqual(parsed, node.closing_tag) | self.assertWikicodeEqual(parsed, node.closing_tag) | ||||
self.assertEqual("<ref>foobar</ref {{ignore me}}>", node) | self.assertEqual("<ref>foobar</ref {{ignore me}}>", node) | ||||
def test_wiki_style_separator(self): | |||||
"""test getter/setter for wiki_style_separator attribute""" | |||||
node = Tag(wraptext("table"), wraptext("\n")) | |||||
self.assertIs(None, node.wiki_style_separator) | |||||
node.wiki_style_separator = "|" | |||||
self.assertEqual("|", node.wiki_style_separator) | |||||
node.wiki_markup = "{" | |||||
self.assertEqual("{|\n{", node) | |||||
node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|") | |||||
self.assertEqual("|", node.wiki_style_separator) | |||||
def test_closing_wiki_markup(self): | |||||
"""test getter/setter for closing_wiki_markup attribute""" | |||||
node = Tag(wraptext("table"), wraptext("\n")) | |||||
self.assertIs(None, node.closing_wiki_markup) | |||||
node.wiki_markup = "{|" | |||||
self.assertEqual("{|", node.closing_wiki_markup) | |||||
node.closing_wiki_markup = "|}" | |||||
self.assertEqual("|}", node.closing_wiki_markup) | |||||
self.assertEqual("{|\n|}", node) | |||||
node.wiki_markup = "!!" | |||||
self.assertEqual("|}", node.closing_wiki_markup) | |||||
self.assertEqual("!!\n|}", node) | |||||
node.wiki_markup = False | |||||
self.assertFalse(node.closing_wiki_markup) | |||||
self.assertEqual("<table>\n</table>", node) | |||||
node2 = Tag(wraptext("table"), wraptext("\n"), | |||||
attrs=[agen("id", "foo")], wiki_markup="{|", | |||||
closing_wiki_markup="|}") | |||||
self.assertEqual("|}", node2.closing_wiki_markup) | |||||
self.assertEqual('{| id="foo"\n|}', node2) | |||||
def test_has(self): | def test_has(self): | ||||
"""test Tag.has()""" | """test Tag.has()""" | ||||
node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")]) | node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")]) | ||||
@@ -272,28 +308,33 @@ class TestTag(TreeEqualityTestCase): | |||||
"""test Tag.add()""" | """test Tag.add()""" | ||||
node = Tag(wraptext("ref"), wraptext("cite")) | node = Tag(wraptext("ref"), wraptext("cite")) | ||||
node.add("name", "value") | node.add("name", "value") | ||||
node.add("name", "value", quoted=False) | |||||
node.add("name", "value", quotes=None) | |||||
node.add("name", "value", quotes="'") | |||||
node.add("name") | node.add("name") | ||||
node.add(1, False) | node.add(1, False) | ||||
node.add("style", "{{foobar}}") | node.add("style", "{{foobar}}") | ||||
node.add("name", "value", True, "\n", " ", " ") | |||||
node.add("name", "value", '"', "\n", " ", " ") | |||||
attr1 = ' name="value"' | attr1 = ' name="value"' | ||||
attr2 = " name=value" | attr2 = " name=value" | ||||
attr3 = " name" | |||||
attr4 = ' 1="False"' | |||||
attr5 = ' style="{{foobar}}"' | |||||
attr6 = '\nname = "value"' | |||||
attr3 = " name='value'" | |||||
attr4 = " name" | |||||
attr5 = ' 1="False"' | |||||
attr6 = ' style="{{foobar}}"' | |||||
attr7 = '\nname = "value"' | |||||
self.assertEqual(attr1, node.attributes[0]) | self.assertEqual(attr1, node.attributes[0]) | ||||
self.assertEqual(attr2, node.attributes[1]) | self.assertEqual(attr2, node.attributes[1]) | ||||
self.assertEqual(attr3, node.attributes[2]) | self.assertEqual(attr3, node.attributes[2]) | ||||
self.assertEqual(attr4, node.attributes[3]) | self.assertEqual(attr4, node.attributes[3]) | ||||
self.assertEqual(attr5, node.attributes[4]) | self.assertEqual(attr5, node.attributes[4]) | ||||
self.assertEqual(attr6, node.attributes[5]) | self.assertEqual(attr6, node.attributes[5]) | ||||
self.assertEqual(attr6, node.get("name")) | |||||
self.assertEqual(attr7, node.attributes[6]) | |||||
self.assertEqual(attr7, node.get("name")) | |||||
self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]), | self.assertWikicodeEqual(wrap([Template(wraptext("foobar"))]), | ||||
node.attributes[4].value) | |||||
node.attributes[5].value) | |||||
self.assertEqual("".join(("<ref", attr1, attr2, attr3, attr4, attr5, | self.assertEqual("".join(("<ref", attr1, attr2, attr3, attr4, attr5, | ||||
attr6, ">cite</ref>")), node) | |||||
attr6, attr7, ">cite</ref>")), node) | |||||
self.assertRaises(ValueError, node.add, "name", "foo", quotes="bar") | |||||
self.assertRaises(ValueError, node.add, "name", "a bc d", quotes=None) | |||||
def test_remove(self): | def test_remove(self): | ||||
"""test Tag.remove()""" | """test Tag.remove()""" | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -130,6 +130,8 @@ class TestTemplate(TreeEqualityTestCase): | |||||
self.assertTrue(node4.has("b", False)) | self.assertTrue(node4.has("b", False)) | ||||
self.assertTrue(node3.has("b", True)) | self.assertTrue(node3.has("b", True)) | ||||
self.assertFalse(node4.has("b", True)) | self.assertFalse(node4.has("b", True)) | ||||
self.assertFalse(node1.has_param("foobar", False)) | |||||
self.assertTrue(node2.has_param(1, False)) | |||||
def test_get(self): | def test_get(self): | ||||
"""test Template.get()""" | """test Template.get()""" | ||||
@@ -176,52 +178,41 @@ class TestTemplate(TreeEqualityTestCase): | |||||
pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | ||||
node16 = Template(wraptext("a"), [ | node16 = Template(wraptext("a"), [ | ||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | ||||
node17 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||||
node18 = Template(wraptext("a\n"), [ | |||||
pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), | |||||
pgens("h ", " i\n")]) | |||||
node19 = Template(wraptext("a"), [ | |||||
pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | |||||
node20 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||||
node21 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node22 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node23 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node24 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
node17 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node18 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node19 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node20 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
pgenh("3", "d"), pgenh("4", "e")]) | pgenh("3", "d"), pgenh("4", "e")]) | ||||
node25 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
node21 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
pgens("4", "d"), pgens("5", "e")]) | pgens("4", "d"), pgens("5", "e")]) | ||||
node26 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
node22 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
pgens("4", "d"), pgens("5", "e")]) | pgens("4", "d"), pgens("5", "e")]) | ||||
node23 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node24 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node25 = Template(wraptext("a"), [pgens("b", "c")]) | |||||
node26 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node27 = Template(wraptext("a"), [pgenh("1", "b")]) | node27 = Template(wraptext("a"), [pgenh("1", "b")]) | ||||
node28 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node29 = Template(wraptext("a"), [pgens("b", "c")]) | |||||
node30 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node31 = Template(wraptext("a"), [pgenh("1", "b")]) | |||||
node32 = Template(wraptext("a"), [pgens("1", "b")]) | |||||
node33 = Template(wraptext("a"), [ | |||||
node28 = Template(wraptext("a"), [pgens("1", "b")]) | |||||
node29 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | ||||
node34 = Template(wraptext("a\n"), [ | |||||
node30 = Template(wraptext("a\n"), [ | |||||
pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), | pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), | ||||
pgens("h ", " i\n")]) | pgens("h ", " i\n")]) | ||||
node35 = Template(wraptext("a"), [ | |||||
node31 = Template(wraptext("a"), [ | |||||
pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | ||||
node36 = Template(wraptext("a"), [ | |||||
node32 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")]) | pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")]) | ||||
node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||||
pgens("b", "f"), pgens("b", "h"), | |||||
pgens("i", "j")]) | |||||
node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||||
node33 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||||
pgens("b", "f"), pgens("b", "h"), | pgens("b", "f"), pgens("b", "h"), | ||||
pgens("i", "j")]) | pgens("i", "j")]) | ||||
node38 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), | |||||
node34 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), | |||||
pgens("1", "c"), pgens("2", "d")]) | pgens("1", "c"), pgens("2", "d")]) | ||||
node39 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), | |||||
node35 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), | |||||
pgenh("1", "c"), pgenh("2", "d")]) | pgenh("1", "c"), pgenh("2", "d")]) | ||||
node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||||
node36 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||||
pgens("f", "g")]) | pgens("f", "g")]) | ||||
node41 = Template(wraptext("a"), [pgenh("1", "")]) | |||||
node37 = Template(wraptext("a"), [pgenh("1", "")]) | |||||
node38 = Template(wraptext("abc")) | |||||
node1.add("e", "f", showkey=True) | node1.add("e", "f", showkey=True) | ||||
node2.add(2, "g", showkey=False) | node2.add(2, "g", showkey=False) | ||||
@@ -241,31 +232,29 @@ class TestTemplate(TreeEqualityTestCase): | |||||
node14.add("j", "k", showkey=True) | node14.add("j", "k", showkey=True) | ||||
node15.add("h", "i", showkey=True) | node15.add("h", "i", showkey=True) | ||||
node16.add("h", "i", showkey=True, preserve_spacing=False) | node16.add("h", "i", showkey=True, preserve_spacing=False) | ||||
node17.add("h", "i", showkey=False) | |||||
node18.add("j", "k", showkey=False) | |||||
node19.add("h", "i", showkey=False) | |||||
node20.add("h", "i", showkey=False, preserve_spacing=False) | |||||
node21.add("2", "c") | |||||
node22.add("3", "c") | |||||
node23.add("c", "d") | |||||
node24.add("5", "f") | |||||
node25.add("3", "f") | |||||
node26.add("6", "f") | |||||
node27.add("c", "foo=bar") | |||||
node28.add("2", "foo=bar") | |||||
node29.add("b", "d") | |||||
node30.add("1", "foo=bar") | |||||
node31.add("1", "foo=bar", showkey=True) | |||||
node32.add("1", "foo=bar", showkey=False) | |||||
node33.add("d", "foo") | |||||
node34.add("f", "foo") | |||||
node35.add("f", "foo") | |||||
node36.add("d", "foo", preserve_spacing=False) | |||||
node37.add("b", "k") | |||||
node38.add("1", "e") | |||||
node39.add("1", "e") | |||||
node40.add("d", "h", before="b") | |||||
node41.add(1, "b") | |||||
node17.add("2", "c") | |||||
node18.add("3", "c") | |||||
node19.add("c", "d") | |||||
node20.add("5", "f") | |||||
node21.add("3", "f") | |||||
node22.add("6", "f") | |||||
node23.add("c", "foo=bar") | |||||
node24.add("2", "foo=bar") | |||||
node25.add("b", "d") | |||||
node26.add("1", "foo=bar") | |||||
node27.add("1", "foo=bar", showkey=True) | |||||
node28.add("1", "foo=bar", showkey=False) | |||||
node29.add("d", "foo") | |||||
node30.add("f", "foo") | |||||
node31.add("f", "foo") | |||||
node32.add("d", "foo", preserve_spacing=False) | |||||
node33.add("b", "k") | |||||
node34.add("1", "e") | |||||
node35.add("1", "e") | |||||
node36.add("d", "h", before="b") | |||||
node37.add(1, "b") | |||||
node38.add("1", "foo") | |||||
self.assertRaises(ValueError, node38.add, "z", "bar", showkey=False) | |||||
self.assertEqual("{{a|b=c|d|e=f}}", node1) | self.assertEqual("{{a|b=c|d|e=f}}", node1) | ||||
self.assertEqual("{{a|b=c|d|g}}", node2) | self.assertEqual("{{a|b=c|d|g}}", node2) | ||||
@@ -285,34 +274,31 @@ class TestTemplate(TreeEqualityTestCase): | |||||
self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) | self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) | ||||
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) | self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) | ||||
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) | self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) | ||||
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g| i}}", node17) | |||||
self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18) | |||||
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |i}}", node19) | |||||
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|i}}", node20) | |||||
self.assertEqual("{{a|b|c}}", node21) | |||||
self.assertEqual("{{a|b|3=c}}", node22) | |||||
self.assertEqual("{{a|b|c=d}}", node23) | |||||
self.assertEqual("{{a|b|c|d|e|f}}", node24) | |||||
self.assertEqual("{{a|b|c|4=d|5=e|f}}", node25) | |||||
self.assertEqual("{{a|b|c|4=d|5=e|6=f}}", node26) | |||||
self.assertEqual("{{a|b|c=foo=bar}}", node27) | |||||
self.assertEqual("{{a|b|foo=bar}}", node28) | |||||
self.assertIsInstance(node28.params[1].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|b=d}}", node29) | |||||
self.assertEqual("{{a|foo=bar}}", node30) | |||||
self.assertIsInstance(node30.params[0].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|1=foo=bar}}", node31) | |||||
self.assertEqual("{{a|foo=bar}}", node32) | |||||
self.assertIsInstance(node32.params[0].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|\nb = c|\nd = foo|\nf = g}}", node33) | |||||
self.assertEqual("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34) | |||||
self.assertEqual("{{a|b = c\n|\nd = e|\nf =foo }}", node35) | |||||
self.assertEqual("{{a|\nb = c |\nd =foo|\nf = g }}", node36) | |||||
self.assertEqual("{{a|b=k|d=e|i=j}}", node37) | |||||
self.assertEqual("{{a|1=e|x=y|2=d}}", node38) | |||||
self.assertEqual("{{a|x=y|e|d}}", node39) | |||||
self.assertEqual("{{a|b=c|d=h|f=g}}", node40) | |||||
self.assertEqual("{{a|b}}", node41) | |||||
self.assertEqual("{{a|b|c}}", node17) | |||||
self.assertEqual("{{a|b|3=c}}", node18) | |||||
self.assertEqual("{{a|b|c=d}}", node19) | |||||
self.assertEqual("{{a|b|c|d|e|f}}", node20) | |||||
self.assertEqual("{{a|b|c|4=d|5=e|f}}", node21) | |||||
self.assertEqual("{{a|b|c|4=d|5=e|6=f}}", node22) | |||||
self.assertEqual("{{a|b|c=foo=bar}}", node23) | |||||
self.assertEqual("{{a|b|foo=bar}}", node24) | |||||
self.assertIsInstance(node24.params[1].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|b=d}}", node25) | |||||
self.assertEqual("{{a|foo=bar}}", node26) | |||||
self.assertIsInstance(node26.params[0].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|1=foo=bar}}", node27) | |||||
self.assertEqual("{{a|foo=bar}}", node28) | |||||
self.assertIsInstance(node28.params[0].value.get(1), HTMLEntity) | |||||
self.assertEqual("{{a|\nb = c|\nd = foo|\nf = g}}", node29) | |||||
self.assertEqual("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node30) | |||||
self.assertEqual("{{a|b = c\n|\nd = e|\nf =foo }}", node31) | |||||
self.assertEqual("{{a|\nb = c |\nd =foo|\nf = g }}", node32) | |||||
self.assertEqual("{{a|b=k|d=e|i=j}}", node33) | |||||
self.assertEqual("{{a|1=e|x=y|2=d}}", node34) | |||||
self.assertEqual("{{a|x=y|e|d}}", node35) | |||||
self.assertEqual("{{a|b=c|d=h|f=g}}", node36) | |||||
self.assertEqual("{{a|b}}", node37) | |||||
self.assertEqual("{{abc|foo}}", node38) | |||||
def test_remove(self): | def test_remove(self): | ||||
"""test Template.remove()""" | """test Template.remove()""" | ||||
@@ -373,6 +359,8 @@ class TestTemplate(TreeEqualityTestCase): | |||||
node26 = Template(wraptext("foo"), [ | node26 = Template(wraptext("foo"), [ | ||||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | ||||
pgens("a", "b")]) | pgens("a", "b")]) | ||||
node27 = Template(wraptext("foo"), [pgenh("1", "bar")]) | |||||
node28 = Template(wraptext("foo"), [pgenh("1", "bar")]) | |||||
node2.remove("1") | node2.remove("1") | ||||
node2.remove("abc") | node2.remove("abc") | ||||
@@ -430,6 +418,7 @@ class TestTemplate(TreeEqualityTestCase): | |||||
self.assertEqual("{{foo|a=|c=d|e=f|a=b|a=b}}", node24) | self.assertEqual("{{foo|a=|c=d|e=f|a=b|a=b}}", node24) | ||||
self.assertEqual("{{foo|a=b|c=d|e=f|a=b}}", node25) | self.assertEqual("{{foo|a=b|c=d|e=f|a=b}}", node25) | ||||
self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26) | self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26) | ||||
self.assertRaises(ValueError, node27.remove, node28.get(1)) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -188,6 +188,13 @@ class TestWikicode(TreeEqualityTestCase): | |||||
func("is {{some", "cd", recursive=True) | func("is {{some", "cd", recursive=True) | ||||
self.assertEqual(expected[5], code6) | self.assertEqual(expected[5], code6) | ||||
code7 = parse("{{foo}}{{bar}}{{baz}}{{foo}}{{baz}}") | |||||
func = partial(meth, code7) | |||||
obj = wrap([code7.get(0), code7.get(2)]) | |||||
self.assertRaises(ValueError, func, obj, "{{lol}}") | |||||
func("{{foo}}{{baz}}", "{{lol}}") | |||||
self.assertEqual(expected[6], code7) | |||||
def test_insert_before(self): | def test_insert_before(self): | ||||
"""test Wikicode.insert_before()""" | """test Wikicode.insert_before()""" | ||||
meth = lambda code, *args, **kw: code.insert_before(*args, **kw) | meth = lambda code, *args, **kw: code.insert_before(*args, **kw) | ||||
@@ -197,7 +204,8 @@ class TestWikicode(TreeEqualityTestCase): | |||||
"{{a|x{{b}}|{{c|d=y{{f}}}}}}", | "{{a|x{{b}}|{{c|d=y{{f}}}}}}", | ||||
"{{a}}w{{b}}{{c}}x{{d}}{{e}}{{f}}{{g}}{{h}}yz{{i}}{{j}}", | "{{a}}w{{b}}{{c}}x{{d}}{{e}}{{f}}{{g}}{{h}}yz{{i}}{{j}}", | ||||
"{{a|x{{b}}{{c}}|{{f|{{g}}=y{{h}}{{i}}}}}}", | "{{a|x{{b}}{{c}}|{{f|{{g}}=y{{h}}{{i}}}}}}", | ||||
"here cdis {{some abtext and a {{template}}}}"] | |||||
"here cdis {{some abtext and a {{template}}}}", | |||||
"{{foo}}{{bar}}{{baz}}{{lol}}{{foo}}{{baz}}"] | |||||
self._test_search(meth, expected) | self._test_search(meth, expected) | ||||
def test_insert_after(self): | def test_insert_after(self): | ||||
@@ -209,7 +217,8 @@ class TestWikicode(TreeEqualityTestCase): | |||||
"{{a|{{b}}x|{{c|d={{f}}y}}}}", | "{{a|{{b}}x|{{c|d={{f}}y}}}}", | ||||
"{{a}}{{b}}{{c}}w{{d}}{{e}}x{{f}}{{g}}{{h}}{{i}}{{j}}yz", | "{{a}}{{b}}{{c}}w{{d}}{{e}}x{{f}}{{g}}{{h}}{{i}}{{j}}yz", | ||||
"{{a|{{b}}{{c}}x|{{f|{{g}}={{h}}{{i}}y}}}}", | "{{a|{{b}}{{c}}x|{{f|{{g}}={{h}}{{i}}y}}}}", | ||||
"here is {{somecd text andab a {{template}}}}"] | |||||
"here is {{somecd text andab a {{template}}}}", | |||||
"{{foo}}{{bar}}{{baz}}{{foo}}{{baz}}{{lol}}"] | |||||
self._test_search(meth, expected) | self._test_search(meth, expected) | ||||
def test_replace(self): | def test_replace(self): | ||||
@@ -218,7 +227,7 @@ class TestWikicode(TreeEqualityTestCase): | |||||
expected = [ | expected = [ | ||||
"{{a}}xz[[y]]{{e}}", "dcdffe", "{{a|x|{{c|d=y}}}}", | "{{a}}xz[[y]]{{e}}", "dcdffe", "{{a|x|{{c|d=y}}}}", | ||||
"{{a}}wx{{f}}{{g}}z", "{{a|x|{{f|{{g}}=y}}}}", | "{{a}}wx{{f}}{{g}}z", "{{a|x|{{f|{{g}}=y}}}}", | ||||
"here cd ab a {{template}}}}"] | |||||
"here cd ab a {{template}}}}", "{{foo}}{{bar}}{{baz}}{{lol}}"] | |||||
self._test_search(meth, expected) | self._test_search(meth, expected) | ||||
def test_append(self): | def test_append(self): | ||||
@@ -238,8 +247,8 @@ class TestWikicode(TreeEqualityTestCase): | |||||
meth = lambda code, obj, value, **kw: code.remove(obj, **kw) | meth = lambda code, obj, value, **kw: code.remove(obj, **kw) | ||||
expected = [ | expected = [ | ||||
"{{a}}{{c}}", "", "{{a||{{c|d=}}}}", "{{a}}{{f}}", | "{{a}}{{c}}", "", "{{a||{{c|d=}}}}", "{{a}}{{f}}", | ||||
"{{a||{{f|{{g}}=}}}}", "here a {{template}}}}" | |||||
] | |||||
"{{a||{{f|{{g}}=}}}}", "here a {{template}}}}", | |||||
"{{foo}}{{bar}}{{baz}}"] | |||||
self._test_search(meth, expected) | self._test_search(meth, expected) | ||||
def test_matches(self): | def test_matches(self): | ||||
@@ -319,11 +328,14 @@ class TestWikicode(TreeEqualityTestCase): | |||||
self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) | self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) | ||||
self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) | self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) | ||||
self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], | |||||
code2.filter_templates(recursive=False)) | |||||
self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | |||||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], | |||||
code2.filter_templates(recursive=True)) | |||||
exp_rec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | |||||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"] | |||||
exp_unrec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] | |||||
self.assertEqual(exp_rec, code2.filter_templates()) | |||||
self.assertEqual(exp_unrec, code2.filter_templates(recursive=False)) | |||||
self.assertEqual(exp_rec, code2.filter_templates(recursive=True)) | |||||
self.assertEqual(exp_rec, code2.filter_templates(True)) | |||||
self.assertEqual(exp_unrec, code2.filter_templates(False)) | |||||
self.assertEqual(["{{foobar}}"], code3.filter_templates( | self.assertEqual(["{{foobar}}"], code3.filter_templates( | ||||
matches=lambda node: node.name.matches("Foobar"))) | matches=lambda node: node.name.matches("Foobar"))) | ||||
@@ -332,9 +344,15 @@ class TestWikicode(TreeEqualityTestCase): | |||||
self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) | self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) | ||||
self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z", flags=0)) | self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z", flags=0)) | ||||
self.assertRaises(TypeError, code.filter_templates, 100) | |||||
self.assertRaises(TypeError, code.filter_templates, a=42) | self.assertRaises(TypeError, code.filter_templates, a=42) | ||||
self.assertRaises(TypeError, code.filter_templates, forcetype=Template) | self.assertRaises(TypeError, code.filter_templates, forcetype=Template) | ||||
self.assertRaises(TypeError, code.filter_templates, 1, 0, 0, Template) | |||||
code4 = parse("{{foo}}<b>{{foo|{{bar}}}}</b>") | |||||
actual1 = code4.filter_templates(recursive=code4.RECURSE_OTHERS) | |||||
actual2 = code4.filter_templates(code4.RECURSE_OTHERS) | |||||
self.assertEqual(["{{foo}}", "{{foo|{{bar}}}}"], actual1) | |||||
self.assertEqual(["{{foo}}", "{{foo|{{bar}}}}"], actual2) | |||||
def test_get_sections(self): | def test_get_sections(self): | ||||
"""test Wikicode.get_sections()""" | """test Wikicode.get_sections()""" | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -37,3 +37,10 @@ name: incomplete_partial_close | |||||
label: a comment that doesn't close, with a partial close | label: a comment that doesn't close, with a partial close | ||||
input: "<!-- foo --\x01>" | input: "<!-- foo --\x01>" | ||||
output: [Text(text="<!-- foo --\x01>")] | output: [Text(text="<!-- foo --\x01>")] | ||||
--- | |||||
name: incomplete_part_only | |||||
label: a comment that only has a < and ! | |||||
input: "<!foo" | |||||
output: [Text(text="<!foo")] |
@@ -124,8 +124,8 @@ output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com."), Exte | |||||
--- | --- | ||||
name: brackets_colons_after | |||||
label: colons after a bracket-enclosed link that are included | |||||
name: brackets_punct_after | |||||
label: punctuation after a bracket-enclosed link that are included | |||||
input: "[http://example.com/foo:bar.:;baz!?, Example]" | input: "[http://example.com/foo:bar.:;baz!?, Example]" | ||||
output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo:bar.:;baz!?,"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()] | output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo:bar.:;baz!?,"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose()] | ||||
@@ -43,7 +43,7 @@ output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(t | |||||
name: rich_tags | name: rich_tags | ||||
label: a HTML tag with tons of other things in it | label: a HTML tag with tons of other things in it | ||||
input: "{{dubious claim}}<ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} \n mno = "{{p}} [[q]] {{r}}">[[Source]]</ref>" | input: "{{dubious claim}}<ref name={{abc}} foo="bar {{baz}}" abc={{de}}f ghi=j{{k}}{{l}} \n mno = "{{p}} [[q]] {{r}}">[[Source]]</ref>" | ||||
output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
output: [TemplateOpen(), Text(text="dubious claim"), TemplateClose(), TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TemplateOpen(), Text(text="abc"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar "), TemplateOpen(), Text(text="baz"), TemplateClose(), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="abc"), TagAttrEquals(), TemplateOpen(), Text(text="de"), TemplateClose(), Text(text="f"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="ghi"), TagAttrEquals(), Text(text="j"), TemplateOpen(), Text(text="k"), TemplateClose(), TemplateOpen(), Text(text="l"), TemplateClose(), TagAttrStart(pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" "), Text(text="mno"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="p"), TemplateClose(), Text(text=" "), WikilinkOpen(), Text(text="q"), WikilinkClose(), Text(text=" "), TemplateOpen(), Text(text="r"), TemplateClose(), TagCloseOpen(padding=""), WikilinkOpen(), Text(text="Source"), WikilinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
--- | --- | ||||
@@ -178,3 +178,66 @@ name: external_link_inside_wikilink_title | |||||
label: an external link inside a wikilink title, which is invalid | label: an external link inside a wikilink title, which is invalid | ||||
input: "[[File:Example.png http://example.com]]" | input: "[[File:Example.png http://example.com]]" | ||||
output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()] | output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()] | ||||
--- | |||||
name: italics_inside_external_link_inside_incomplete_list | |||||
label: italic text inside an external link inside an incomplete list | |||||
input: "<li>[http://www.example.com ''example'']" | |||||
output: [TagOpenOpen(), Text(text="li"), TagCloseSelfclose(padding="", implicit=True), ExternalLinkOpen(brackets=True), Text(text="http://www.example.com"), ExternalLinkSeparator(), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="example"), TagOpenClose(), Text(text="i"), TagCloseClose(), ExternalLinkClose()] | |||||
--- | |||||
name: nodes_inside_external_link_after_punct | |||||
label: various complex nodes inside an external link following punctuation | |||||
input: "http://example.com/foo.{{bar}}baz.&biz;<!--hello-->bingo" | |||||
output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/foo."), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz.&biz;"), CommentStart(), Text(text="hello"), CommentEnd(), Text(text="bingo"), ExternalLinkClose()] | |||||
--- | |||||
name: newline_and_comment_in_template_name | |||||
label: a template name containing a newline followed by a comment | |||||
input: "{{foobar\n<!-- comment -->}}" | |||||
output: [TemplateOpen(), Text(text="foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateClose()] | |||||
--- | |||||
name: newline_and_comment_in_template_name_2 | |||||
label: a template name containing a newline followed by a comment | |||||
input: "{{foobar\n<!-- comment -->|key=value}}" | |||||
output: [TemplateOpen(), Text(text="foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), TemplateParamSeparator(), Text(text="key"), TemplateParamEquals(), Text(text="value"), TemplateClose()] | |||||
--- | |||||
name: newline_and_comment_in_template_name_3 | |||||
label: a template name containing a newline followed by a comment | |||||
input: "{{foobar\n<!-- comment -->\n|key=value}}" | |||||
output: [TemplateOpen(), Text(text="foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\n"), TemplateParamSeparator(), Text(text="key"), TemplateParamEquals(), Text(text="value"), TemplateClose()] | |||||
--- | |||||
name: newline_and_comment_in_template_name_4 | |||||
label: a template name containing a newline followed by a comment | |||||
input: "{{foobar\n<!-- comment -->invalid|key=value}}" | |||||
output: [Text(text="{{foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="invalid|key=value}}")] | |||||
--- | |||||
name: newline_and_comment_in_template_name_5 | |||||
label: a template name containing a newline followed by a comment | |||||
input: "{{foobar\n<!-- comment -->\ninvalid|key=value}}" | |||||
output: [Text(text="{{foobar\n"), CommentStart(), Text(text=" comment "), CommentEnd(), Text(text="\ninvalid|key=value}}")] | |||||
--- | |||||
name: newline_and_comment_in_template_name_6 | |||||
label: a template name containing a newline followed by a comment | |||||
input: "{{foobar\n<!--|key=value}}" | |||||
output: [Text(text="{{foobar\n<!--|key=value}}")] | |||||
--- | |||||
name: newline_and_comment_in_template_name_7 | |||||
label: a template name containing a newline followed by a comment | |||||
input: "{{foobar\n<!|key=value}}" | |||||
output: [Text(text="{{foobar\n<!|key=value}}")] |
@@ -0,0 +1,410 @@ | |||||
name: empty_table | |||||
label: parsing an empty table | |||||
input: "{|\n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: inline_table | |||||
label: tables with a close on the same line are not valid | |||||
input: "{||}" | |||||
output: [Text(text="{||}")] | |||||
--- | |||||
name: no_table_close_simple | |||||
label: no table close on inline table | |||||
input: "{| " | |||||
output: [Text(text="{| ")] | |||||
--- | |||||
name: no_table_close_newline | |||||
label: no table close with a newline | |||||
input: "{| \n " | |||||
output: [Text(text="{| \n ")] | |||||
--- | |||||
name: no_table_close_inside_cell | |||||
label: no table close while inside of a cell | |||||
input: "{| \n| " | |||||
output: [Text(text="{| \n| ")] | |||||
--- | |||||
name: no_table_close_inside_cell_after_newline | |||||
label: no table close while inside of a cell after a newline | |||||
input: "{| \n| \n " | |||||
output: [Text(text="{| \n| \n ")] | |||||
--- | |||||
name: no_table_close_inside_cell_with_attributes | |||||
label: no table close while inside of a cell with attributes | |||||
input: "{| \n| red | test" | |||||
output: [Text(text="{| \n| red | test")] | |||||
--- | |||||
name: no_table_close_inside_row | |||||
label: no table close while inside of a row | |||||
input: "{| \n|- " | |||||
output: [Text(text="{| \n|- ")] | |||||
--- | |||||
name: no_table_close_inside_row_after_newline | |||||
label: no table close while inside of a row after a newline | |||||
input: "{| \n|- \n " | |||||
output: [Text(text="{| \n|- \n ")] | |||||
--- | |||||
name: no_table_close_row_and_cell | |||||
label: no table close while inside a cell inside a row | |||||
input: "{| \n|- \n|" | |||||
output: [Text(text="{| \n|- \n|")] | |||||
--- | |||||
name: no_table_close_attributes | |||||
label: don't parse attributes as attributes if the table doesn't exist | |||||
input: "{| border="1"" | |||||
output: [Text(text="{| border=\"1\"")] | |||||
--- | |||||
name: no_table_close_unclosed_attributes | |||||
label: don't parse unclosed attributes if the table doesn't exist | |||||
input: "{| border=" | |||||
output: [Text(text="{| border=")] | |||||
--- | |||||
name: no_table_close_row_attributes | |||||
label: don't parse row attributes as attributes if the table doesn't exist | |||||
input: "{| |- border="1"" | |||||
output: [Text(text="{| |- border=\"1\"")] | |||||
--- | |||||
name: no_table_close_cell | |||||
label: don't parse cells if the table doesn't close | |||||
input: "{| | border="1"| test || red | foo" | |||||
output: [Text(text="{| | border=\"1\"| test || red | foo")] | |||||
--- | |||||
name: crazy_no_table_close | |||||
label: lots of opened wiki syntax without closes | |||||
input: "{{{ {{ {| <ref" | |||||
output: [Text(text="{{{ {{ {| <ref")] | |||||
--- | |||||
name: leading_whitespace_table | |||||
label: handle leading whitespace for a table | |||||
input: "foo \n \t {|\n|}" | |||||
output: [Text(text="foo \n \t "), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: whitespace_after_table | |||||
label: handle whitespace after a table close | |||||
input: "{|\n|}\n \t " | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text="\n \t ")] | |||||
--- | |||||
name: different_whitespace_after_table | |||||
label: handle spaces after a table close | |||||
input: "{|\n|} \n " | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" \n ")] | |||||
--- | |||||
name: characters_after_table | |||||
label: handle characters after a table close | |||||
input: "{|\n|} tsta" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" tsta")] | |||||
--- | |||||
name: characters_after_inline_table | |||||
label: handle characters after an inline table close | |||||
input: "{| |} tsta" | |||||
output: [Text(text="{| |} tsta")] | |||||
--- | |||||
name: leading_characters_table | |||||
label: don't parse as a table when leading characters are not newline or whitespace | |||||
input: "foo \n foo \t {|\n|}" | |||||
output: [Text(text="foo \n foo \t {|\n|}")] | |||||
--- | |||||
name: table_row_simple | |||||
label: simple table row | |||||
input: "{|\n |- \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_row_multiple | |||||
label: simple table row | |||||
input: "{|\n |- \n|- \n |-\n |}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_simple | |||||
label: simple table cell | |||||
input: "{|\n | foo \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_inline | |||||
label: multiple inline table cells | |||||
input: "{|\n | foo || bar || test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar "), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_multiple | |||||
label: multiple table cells (non-inline) | |||||
input: "{|\n| foo \n| bar \n| test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_header_simple | |||||
label: simple header cell | |||||
input: "{|\n ! foo \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_header_inline | |||||
label: multiple inline header cells | |||||
input: "{|\n ! foo || bar !! test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" bar "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_header_multiple | |||||
label: multiple table header cells (non-inline) | |||||
input: "{|\n! foo \n! bar \n! test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagCloseOpen(padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: nested_cells_and_rows | |||||
label: combination of cells and rows in a table | |||||
input: "{|\n|- \n| foo \n|- \n| bar\n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagCloseOpen(padding=" \n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" bar\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_fake_close | |||||
label: looks like a table close but is not | |||||
input: "{|\n | |} \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text="} \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_more_fake_close | |||||
label: looks like a table close but is not | |||||
input: "{|\n || |} \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" |} \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_extra_close | |||||
label: process second close as text | |||||
input: "{| \n |} \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" \n|}")] | |||||
--- | |||||
name: nowiki_inside_table | |||||
label: nowiki handles pipe characters in tables | |||||
input: "{|\n | foo <nowiki>| |- {| |} || ! !!</nowiki> bar \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo "), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="| |- {| |} || ! !!"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), Text(text=" bar \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_text_outside_cell | |||||
label: parse text inside table but outside of a cell | |||||
input: "{|\n bar \n | foo \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar \n "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text=" foo \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: no_table_cell_with_leading_characters | |||||
label: fail to create a table cell when there are leading non-whitespace characters | |||||
input: "{|\n bar | foo \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar | foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: no_table_row_with_leading_characters | |||||
label: fail to create a table row when there are leading non-whitespace characters | |||||
input: "{|\n bar |- foo \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" bar |- foo \n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: template_inside_table_cell | |||||
label: template within table cell | |||||
input: "{|\n |{{foo\n|bar=baz}} \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), Text(text=" \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_attributes | |||||
label: parse table cell style attributes | |||||
input: "{| \n | name="foo bar"| test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_empty_attributes | |||||
label: parse table cell with style markers but no attributes | |||||
input: "{| \n | | test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_with_dash | |||||
label: parse a situation in which a cell line looks like a row line | |||||
input: "{|\n ||- \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="- \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_attributes_quote_with_pipe | |||||
label: pipe inside an attribute quote should still be used as a style separator | |||||
input: "{| \n | name="foo|bar"| test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="\"foo"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="bar\"| test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_attributes_name_with_pipe | |||||
label: pipe inside an attribute name should still be used as a style separator | |||||
input: "{| \n | name|="foo bar" | test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagCloseOpen(wiki_markup="|", padding=""), Text(text="=\"foo bar\" | test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_attributes_pipe_after_equals | |||||
label: pipe inside an attribute should still be used as a style separator after an equals | |||||
input: "{| \n | name=|"foo|bar"| test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagCloseOpen(wiki_markup="|", padding=""), Text(text="\"foo|bar\"| test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_cell_attributes_templates | |||||
label: pipe inside attributes shouldn't be style separator | |||||
input: "{| \n | {{comment|template=baz}} | test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=" "), TemplateOpen(), Text(text="comment"), TemplateParamSeparator(), Text(text="template"), TemplateParamEquals(), Text(text="baz"), TemplateClose(), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: header_cell_attributes | |||||
label: parse header cell style attributes | |||||
input: "{| \n ! name="foo bar"| test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: inline_cell_attributes | |||||
label: parse cell style attributes of inline cells | |||||
input: "{| \n ! name="foo bar" | test ||color="red"| markup!!foo | time \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="!"), Text(text="th"), TagAttrStart(pad_after_eq="", pad_first=" ", pad_before_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test "), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="||"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="color"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="red"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" markup"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenOpen(wiki_markup="!!"), Text(text="th"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), Text(text="foo"), TagCloseOpen(wiki_markup="|", padding=""), Text(text=" time \n"), TagOpenClose(wiki_markup=""), Text(text="th"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_row_attributes | |||||
label: parse table row style attributes | |||||
input: "{| \n |- name="foo bar"\n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_row_attributes_crazy_whitespace | |||||
label: parse table row style attributes with different whitespace | |||||
input: "{| \t \n |- \t name="foo bar" \t \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding=" \t \n"), Text(text=" "), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" \t ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding=" \t \n"), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: table_attributes | |||||
label: parse table style attributes | |||||
input: "{| name="foo bar"\n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: inline_table_attributes | |||||
label: handle attributes in inline tables | |||||
input: "{| foo="tee bar" |}" | |||||
output: [Text(text='{| foo="tee bar" |}')] | |||||
--- | |||||
name: table_incorrect_attributes | |||||
label: parse incorrect table style attributes | |||||
input: "{| name="foo\n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), Text(text="\"foo"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: templates_in_table_attribute | |||||
label: templates in the attributes of a table, after the start | |||||
input: "{| {{class}}="{{wikitable}}"\n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), TemplateOpen(), Text(text="class"), TemplateClose(), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="wikitable"), TemplateClose(), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: templates_in_table_attribute_2 | |||||
label: templates in the attributes of a table, after the start | |||||
input: "{|{{foo}} \n | name="foo bar" | test \n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first="", pad_before_eq=" ", pad_after_eq=""), TemplateOpen(), Text(text="foo"), TemplateClose(), TagCloseOpen(padding="\n"), Text(text=" "), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(wiki_markup="|", padding=" "), Text(text=" test \n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: inappropriate_marker_at_line_start | |||||
label: an inappropriate marker (a right bracket) at the start of a line in the table | |||||
input: "{|\n}" | |||||
output: [Text(text="{|\n}")] | |||||
--- | |||||
name: fake_close_near_start | |||||
label: a fake closing token at the end of the first line in the table | |||||
input: "{| class="wikitable" style="text-align: center; width=100%;|}\n|\n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="class"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="wikitable"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), Text(text="\"text-align:"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="center;"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="width"), TagAttrEquals(), Text(text="100%;|}"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: fake_close_near_start_2 | |||||
label: a fake closing token at the end of the first line in the table | |||||
input: "{| class="wikitable|}"\n|\n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="class"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="wikitable|}"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: junk_after_table_start | |||||
label: ignore more junk on the first line of the table | |||||
input: "{| class="wikitable" | foobar\n|\n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="class"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="wikitable"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="|"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="foobar"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||||
--- | |||||
name: junk_after_table_row | |||||
label: ignore junk on the first line of a table row | |||||
input: "{|\n|- foo="bar" | baz\n|blerp\n|}" | |||||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="bar"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="|"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="baz"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="blerp\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] |
@@ -57,7 +57,14 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before | |||||
name: attribute_quoted | name: attribute_quoted | ||||
label: a tag with a single quoted attribute | label: a tag with a single quoted attribute | ||||
input: "<ref name="foo bar"></ref>" | input: "<ref name="foo bar"></ref>" | ||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
--- | |||||
name: attribute_single_quoted | |||||
label: a tag with a single singly-quoted attribute | |||||
input: "<ref name='foo bar'></ref>" | |||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="foo bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
--- | --- | ||||
@@ -71,7 +78,7 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before | |||||
name: attribute_quoted_hyphen | name: attribute_quoted_hyphen | ||||
label: a tag with a single quoted attribute, containing a hyphen | label: a tag with a single quoted attribute, containing a hyphen | ||||
input: "<ref name="foo-bar"></ref>" | input: "<ref name="foo-bar"></ref>" | ||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo-bar"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
--- | --- | ||||
@@ -92,21 +99,21 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before | |||||
name: attribute_selfclosing_value_quoted | name: attribute_selfclosing_value_quoted | ||||
label: a self-closing tag with a single quoted attribute | label: a self-closing tag with a single quoted attribute | ||||
input: "<ref name="foo"/>" | input: "<ref name="foo"/>" | ||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="foo"), TagCloseSelfclose(padding="")] | |||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="foo"), TagCloseSelfclose(padding="")] | |||||
--- | --- | ||||
name: nested_tag | name: nested_tag | ||||
label: a tag nested within the attributes of another | label: a tag nested within the attributes of another | ||||
input: "<ref name=<span style="color: red;">foo</span>>citation</ref>" | input: "<ref name=<span style="color: red;">foo</span>>citation</ref>" | ||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
--- | --- | ||||
name: nested_tag_quoted | name: nested_tag_quoted | ||||
label: a tag nested within the attributes of another, quoted | label: a tag nested within the attributes of another, quoted | ||||
input: "<ref name="<span style="color: red;">foo</span>">citation</ref>" | input: "<ref name="<span style="color: red;">foo</span>">citation</ref>" | ||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="style"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="color: red;"), TagCloseOpen(padding=""), Text(text="foo"), TagOpenClose(), Text(text="span"), TagCloseClose(), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
--- | --- | ||||
@@ -120,7 +127,14 @@ output: [Text(text="<ref name=</ ><//>>citation</ref>")] | |||||
name: nested_troll_tag_quoted | name: nested_troll_tag_quoted | ||||
label: a bogus tag that appears to be nested within the attributes of another, quoted | label: a bogus tag that appears to be nested within the attributes of another, quoted | ||||
input: "<ref name="</ ><//>">citation</ref>" | input: "<ref name="</ ><//>">citation</ref>" | ||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="name"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="</ ><//>"), TagCloseOpen(padding=""), Text(text="citation"), TagOpenClose(), Text(text="ref"), TagCloseClose()] | |||||
--- | |||||
name: nested_tag_selfclosing | |||||
label: a tag nested within the attributes of another; outer tag implicitly self-closing | |||||
input: "<li <b></b></li>" | |||||
output: [TagOpenOpen(), Text(text="li"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), TagOpenOpen(), Text(text="b"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="</li"), TagCloseSelfclose(padding="", implicit=True)] | |||||
--- | --- | ||||
@@ -215,6 +229,27 @@ output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_befor | |||||
--- | --- | ||||
name: quotes_in_quotes | |||||
label: singly-quoted text inside a doubly-quoted attribute | |||||
input: "<span foo="bar 'baz buzz' biz">stuff</span>" | |||||
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="bar 'baz buzz' biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] | |||||
--- | |||||
name: quotes_in_quotes_2 | |||||
label: doubly-quoted text inside a singly-quoted attribute | |||||
input: "<span foo='bar "baz buzz" biz'>stuff</span>" | |||||
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] | |||||
--- | |||||
name: quotes_in_quotes_3 | |||||
label: doubly-quoted text inside a singly-quoted attribute, with backslashes | |||||
input: "<span foo='bar "baz buzz\\" biz'>stuff</span>" | |||||
output: [TagOpenOpen(), Text(text="span"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char="'"), Text(text="bar \"baz buzz\\\" biz"), TagCloseOpen(padding=""), Text(text="stuff"), TagOpenClose(), Text(text="span"), TagCloseClose()] | |||||
--- | |||||
name: incomplete_lbracket | name: incomplete_lbracket | ||||
label: incomplete tags: just a left bracket | label: incomplete tags: just a left bracket | ||||
input: "<" | input: "<" | ||||
@@ -400,28 +435,28 @@ output: [Text(text="junk <></>")] | |||||
name: backslash_premature_before | name: backslash_premature_before | ||||
label: a backslash before a quote before a space | label: a backslash before a quote before a space | ||||
input: "<foo attribute="this is\\" quoted">blah</foo>" | input: "<foo attribute="this is\\" quoted">blah</foo>" | ||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\" quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
--- | --- | ||||
name: backslash_premature_after | name: backslash_premature_after | ||||
label: a backslash before a quote after a space | label: a backslash before a quote after a space | ||||
input: "<foo attribute="this is \\"quoted">blah</foo>" | input: "<foo attribute="this is \\"quoted">blah</foo>" | ||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is \\\"quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
--- | --- | ||||
name: backslash_premature_middle | name: backslash_premature_middle | ||||
label: a backslash before a quote in the middle of a word | label: a backslash before a quote in the middle of a word | ||||
input: "<foo attribute="this i\\"s quoted">blah</foo>" | input: "<foo attribute="this i\\"s quoted">blah</foo>" | ||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this i\\\"s quoted"), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
--- | --- | ||||
name: backslash_adjacent | name: backslash_adjacent | ||||
label: escaped quotes next to unescaped quotes | label: escaped quotes next to unescaped quotes | ||||
input: "<foo attribute="\\"this is quoted\\"">blah</foo>" | input: "<foo attribute="\\"this is quoted\\"">blah</foo>" | ||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\\"this is quoted\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
--- | --- | ||||
@@ -435,21 +470,21 @@ output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before | |||||
name: backslash_double | name: backslash_double | ||||
label: two adjacent backslashes, which do *not* affect the quote | label: two adjacent backslashes, which do *not* affect the quote | ||||
input: "<foo attribute="this is\\\\" quoted">blah</foo>" | input: "<foo attribute="this is\\\\" quoted">blah</foo>" | ||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
--- | --- | ||||
name: backslash_triple | name: backslash_triple | ||||
label: three adjacent backslashes, which do *not* affect the quote | label: three adjacent backslashes, which do *not* affect the quote | ||||
input: "<foo attribute="this is\\\\\\" quoted">blah</foo>" | input: "<foo attribute="this is\\\\\\" quoted">blah</foo>" | ||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="this is\\\\\\"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="quoted\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
--- | --- | ||||
name: backslash_unaffecting | name: backslash_unaffecting | ||||
label: backslashes near quotes, but not immediately adjacent, thus having no effect | label: backslashes near quotes, but not immediately adjacent, thus having no effect | ||||
input: "<foo attribute="\\quote\\d" also="quote\\d\\">blah</foo>" | input: "<foo attribute="\\quote\\d" also="quote\\d\\">blah</foo>" | ||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
output: [TagOpenOpen(), Text(text="foo"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attribute"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="\\quote\\d"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="also"), TagAttrEquals(), Text(text="\"quote\\d\\\""), TagCloseOpen(padding=""), Text(text="blah"), TagOpenClose(), Text(text="foo"), TagCloseClose()] | |||||
--- | --- | ||||
@@ -470,7 +505,7 @@ output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(t | |||||
name: unparsable_attributed | name: unparsable_attributed | ||||
label: a tag that should not be put through the normal parser; parsed attributes | label: a tag that should not be put through the normal parser; parsed attributes | ||||
input: "{{t1}}<nowiki attr=val attr2="{{val2}}">{{t2}}</nowiki>{{t3}}" | input: "{{t1}}<nowiki attr=val attr2="{{val2}}">{{t2}}</nowiki>{{t3}}" | ||||
output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()] | |||||
output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr"), TagAttrEquals(), Text(text="val"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="attr2"), TagAttrEquals(), TagAttrQuote(char="\""), TemplateOpen(), Text(text="val2"), TemplateClose(), TagCloseOpen(padding=""), Text(text="{{t2}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t3"), TemplateClose()] | |||||
--- | --- | ||||
@@ -568,7 +603,7 @@ output: [Text(text="foo"), TagOpenOpen(invalid=True), Text(text="br"), TagCloseS | |||||
name: single_only_close_attribute | name: single_only_close_attribute | ||||
label: a tag that can only be single; presented as a close tag with an attribute | label: a tag that can only be single; presented as a close tag with an attribute | ||||
input: "</br id="break">" | input: "</br id="break">" | ||||
output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)] | |||||
output: [TagOpenOpen(invalid=True), Text(text="br"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=""), Text(text="id"), TagAttrEquals(), TagAttrQuote(char="\""), Text(text="break"), TagCloseSelfclose(padding="", implicit=True)] | |||||
--- | --- | ||||
@@ -576,3 +611,24 @@ name: capitalization | |||||
label: caps should be ignored within tag names | label: caps should be ignored within tag names | ||||
input: "<NoWiKi>{{test}}</nOwIkI>" | input: "<NoWiKi>{{test}}</nOwIkI>" | ||||
output: [TagOpenOpen(), Text(text="NoWiKi"), TagCloseOpen(padding=""), Text(text="{{test}}"), TagOpenClose(), Text(text="nOwIkI"), TagCloseClose()] | output: [TagOpenOpen(), Text(text="NoWiKi"), TagCloseOpen(padding=""), Text(text="{{test}}"), TagOpenClose(), Text(text="nOwIkI"), TagCloseClose()] | ||||
--- | |||||
name: unparsable_incomplete_close | |||||
label: an unparsable tag with an incomplete close afterwards | |||||
input: "<nowiki>foo</nowiki" | |||||
output: [Text(text="<nowiki>foo</nowiki")] | |||||
--- | |||||
name: unparsable_with_intermediates | |||||
label: an unparsable tag with intermediate tags inside of it | |||||
input: "<nowiki><ref></ref></nowiki>" | |||||
output: [TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="<ref></ref>"), TagOpenClose(), Text(text="nowiki"), TagCloseClose()] | |||||
--- | |||||
name: unparsable_with_intermediates_normalize | |||||
label: an unparsable tag with intermediate tags inside of it, requiring normalization | |||||
input: "<nowiki><ref></ref></nowIKI >" | |||||
output: [TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="<ref></ref>"), TagOpenClose(), Text(text="nowIKI "), TagCloseClose()] |
@@ -244,6 +244,13 @@ output: [Text(text="''"), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagClos | |||||
--- | --- | ||||
name: unending_bold_and_italics | |||||
label: five ticks (bold and italics) that don't end | |||||
input: "'''''testing" | |||||
output: [Text(text="'''''testing")] | |||||
--- | |||||
name: complex_ul | name: complex_ul | ||||
label: ul with a lot in it | label: ul with a lot in it | ||||
input: "* this is a test of an [[Unordered list|ul]] with {{plenty|of|stuff}}" | input: "* this is a test of an [[Unordered list|ul]] with {{plenty|of|stuff}}" | ||||
@@ -440,6 +447,13 @@ output: [TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Tag | |||||
--- | --- | ||||
name: dt_dd_mix4 | |||||
label: another example of correct dt/dd usage, with a trigger for a specific parse route | |||||
input: ";foo]:bar" | |||||
output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="foo]"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="bar")] | |||||
--- | |||||
name: ul_ol_dt_dd_mix | name: ul_ol_dt_dd_mix | ||||
label: an assortment of uls, ols, dds, and dts | label: an assortment of uls, ols, dds, and dts | ||||
input: ";:#*foo\n:#*;foo\n#*;:foo\n*;:#foo" | input: ";:#*foo\n:#*;foo\n#*;:foo\n*;:#foo" | ||||
@@ -376,6 +376,20 @@ output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}} | |||||
--- | --- | ||||
name: newlines_spaces | |||||
label: newlines in the middle of a template name, followed by spaces | |||||
input: "{{foo\n }}" | |||||
output: [TemplateOpen(), Text(text="foo\n "), TemplateClose()] | |||||
--- | |||||
name: newlines_spaces_param | |||||
label: newlines in the middle of a template name, followed by spaces, with a parameter | |||||
input: "{{foo\n |bar=baz}}" | |||||
output: [TemplateOpen(), Text(text="foo\n "), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||||
--- | |||||
name: invalid_name_left_brace_middle | name: invalid_name_left_brace_middle | ||||
label: invalid characters in template name: left brace in middle | label: invalid characters in template name: left brace in middle | ||||
input: "{{foo{bar}}" | input: "{{foo{bar}}" | ||||