Conflicts: tests/test_builder.pytags/v0.3
@@ -1,5 +1,6 @@ | |||||
*.pyc | *.pyc | ||||
*.so | *.so | ||||
*.dll | |||||
*.egg | *.egg | ||||
*.egg-info | *.egg-info | ||||
.DS_Store | .DS_Store | ||||
@@ -0,0 +1,41 @@ | |||||
v0.3 (unreleased): | |||||
- Various fixes and cleanup. | |||||
v0.2 (released June 20, 2013): | |||||
- The parser now fully supports Python 3 in addition to Python 2.7. | |||||
- Added a C tokenizer extension that is significantly faster than its Python | |||||
equivalent. It is enabled by default (if available) and can be toggled by | |||||
setting `mwparserfromhell.parser.use_c` to a boolean value. | |||||
- Added a complete set of unit tests covering parsing and wikicode | |||||
manipulation. | |||||
- Renamed Wikicode.filter_links() to filter_wikilinks() (applies to ifilter as | |||||
well). | |||||
- Added filter methods for Arguments, Comments, Headings, and HTMLEntities. | |||||
- Added 'before' param to Template.add(); renamed 'force_nonconformity' to | |||||
'preserve_spacing'. | |||||
- Added 'include_lead' param to Wikicode.get_sections(). | |||||
- Removed 'flat' param from Wikicode.get_sections(). | |||||
- Removed 'force_no_field' param from Template.remove(). | |||||
- Added support for Travis CI. | |||||
- Added note about Windows build issue in the README. | |||||
- The tokenizer will limit itself to a realistic recursion depth to prevent | |||||
errors and unreasonably long parse times. | |||||
- Fixed how some nodes' attribute setters handle input. | |||||
- Fixed multiple bugs in the tokenizer's handling of invalid markup. | |||||
- Fixed bugs in the implementation of SmartList and StringMixIn. | |||||
- Fixed some broken example code in the README; other copyedits. | |||||
- Other bugfixes and code cleanup. | |||||
v0.1.1 (released September 21, 2012): | |||||
- Added support for Comments (<!-- foo -->) and Wikilinks ([[foo]]). | |||||
- Added corresponding ifilter_links() and filter_links() methods to Wikicode. | |||||
- Fixed a bug when parsing incomplete templates. | |||||
- Fixed strip_code() to affect the contents of headings. | |||||
- Various copyedits in documentation and comments. | |||||
v0.1 (released August 23, 2012): | |||||
- Initial release. |
@@ -9,7 +9,8 @@ mwparserfromhell | |||||
that provides an easy-to-use and outrageously powerful parser for MediaWiki_ | that provides an easy-to-use and outrageously powerful parser for MediaWiki_ | ||||
wikicode. It supports Python 2 and Python 3. | wikicode. It supports Python 2 and Python 3. | ||||
Developed by Earwig_ with help from `Σ`_. | |||||
Developed by Earwig_ with help from `Σ`_. Full documentation is available on | |||||
ReadTheDocs_. | |||||
Installation | Installation | ||||
------------ | ------------ | ||||
@@ -142,6 +143,7 @@ following code (via the API_):: | |||||
return mwparserfromhell.parse(text) | return mwparserfromhell.parse(text) | ||||
.. _MediaWiki: http://mediawiki.org | .. _MediaWiki: http://mediawiki.org | ||||
.. _ReadTheDocs: http://mwparserfromhell.readthedocs.org | |||||
.. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | .. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig | ||||
.. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | .. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 | ||||
.. _Python Package Index: http://pypi.python.org | .. _Python Package Index: http://pypi.python.org | ||||
@@ -0,0 +1,66 @@ | |||||
Changelog | |||||
========= | |||||
v0.3 | |||||
---- | |||||
Unreleased | |||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.2...develop>`__): | |||||
- Various fixes and cleanup. | |||||
v0.2 | |||||
---- | |||||
`Released June 20, 2013 <https://github.com/earwig/mwparserfromhell/tree/v0.2>`_ | |||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.1.1...v0.2>`__): | |||||
- The parser now fully supports Python 3 in addition to Python 2.7. | |||||
- Added a C tokenizer extension that is significantly faster than its Python | |||||
equivalent. It is enabled by default (if available) and can be toggled by | |||||
setting :py:attr:`mwparserfromhell.parser.use_c` to a boolean value. | |||||
- Added a complete set of unit tests covering parsing and wikicode | |||||
manipulation. | |||||
- Renamed :py:meth:`.filter_links` to :py:meth:`.filter_wikilinks` (applies to | |||||
:py:meth:`.ifilter` as well). | |||||
- Added filter methods for :py:class:`Arguments <.Argument>`, | |||||
:py:class:`Comments <.Comment>`, :py:class:`Headings <.Heading>`, and | |||||
:py:class:`HTMLEntities <.HTMLEntity>`. | |||||
- Added *before* param to :py:meth:`Template.add() <.Template.add>`; renamed | |||||
*force_nonconformity* to *preserve_spacing*. | |||||
- Added *include_lead* param to :py:meth:`Wikicode.get_sections() | |||||
<.get_sections>`. | |||||
- Removed *flat* param from :py:meth:`.get_sections`. | |||||
- Removed *force_no_field* param from :py:meth:`Template.remove() | |||||
<.Template.remove>`. | |||||
- Added support for Travis CI. | |||||
- Added note about Windows build issue in the README. | |||||
- The tokenizer will limit itself to a realistic recursion depth to prevent | |||||
errors and unreasonably long parse times. | |||||
- Fixed how some nodes' attribute setters handle input. | |||||
- Fixed multiple bugs in the tokenizer's handling of invalid markup. | |||||
- Fixed bugs in the implementation of :py:class:`.SmartList` and | |||||
:py:class:`.StringMixIn`. | |||||
- Fixed some broken example code in the README; other copyedits. | |||||
- Other bugfixes and code cleanup. | |||||
v0.1.1 | |||||
------ | |||||
`Released September 21, 2012 <https://github.com/earwig/mwparserfromhell/tree/v0.1.1>`_ | |||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.1...v0.1.1>`__): | |||||
- Added support for :py:class:`Comments <.Comment>` (``<!-- foo -->``) and | |||||
:py:class:`Wikilinks <.Wikilink>` (``[[foo]]``). | |||||
- Added corresponding :py:meth:`.ifilter_links` and :py:meth:`.filter_links` | |||||
methods to :py:class:`.Wikicode`. | |||||
- Fixed a bug when parsing incomplete templates. | |||||
- Fixed :py:meth:`.strip_code` to affect the contents of headings. | |||||
- Various copyedits in documentation and comments. | |||||
v0.1 | |||||
---- | |||||
`Released August 23, 2012 <https://github.com/earwig/mwparserfromhell/tree/v0.1>`_: | |||||
- Initial release. |
@@ -1,5 +1,5 @@ | |||||
MWParserFromHell v0.2 Documentation | |||||
=================================== | |||||
MWParserFromHell v\ |version| Documentation | |||||
=========================================== | |||||
:py:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python | :py:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python | ||||
package that provides an easy-to-use and outrageously powerful parser for | package that provides an easy-to-use and outrageously powerful parser for | ||||
@@ -41,6 +41,7 @@ Contents | |||||
usage | usage | ||||
integration | integration | ||||
changelog | |||||
API Reference <api/modules> | API Reference <api/modules> | ||||
@@ -31,7 +31,7 @@ from __future__ import unicode_literals | |||||
__author__ = "Ben Kurtovic" | __author__ = "Ben Kurtovic" | ||||
__copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" | __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" | ||||
__license__ = "MIT License" | __license__ = "MIT License" | ||||
__version__ = "0.2.dev" | |||||
__version__ = "0.3.dev" | |||||
__email__ = "ben.kurtovic@verizon.net" | __email__ = "ben.kurtovic@verizon.net" | ||||
from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode | from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode | ||||
@@ -293,7 +293,7 @@ class Template(Node): | |||||
""" | """ | ||||
name = name.strip() if isinstance(name, basestring) else str(name) | name = name.strip() if isinstance(name, basestring) else str(name) | ||||
removed = False | removed = False | ||||
to_remove =[] | |||||
to_remove = [] | |||||
for i, param in enumerate(self.params): | for i, param in enumerate(self.params): | ||||
if param.name.strip() == name: | if param.name.strip() == name: | ||||
if keep_field: | if keep_field: | ||||
@@ -23,9 +23,16 @@ SOFTWARE. | |||||
#include "tokenizer.h" | #include "tokenizer.h" | ||||
double log2(double n) | |||||
/* | |||||
Given a context, return the heading level encoded within it. | |||||
*/ | |||||
static int heading_level_from_context(int n) | |||||
{ | { | ||||
return log(n) / log(2); | |||||
int level; | |||||
n /= LC_HEADING_LEVEL_1; | |||||
for (level = 1; n > 1; n >>= 1) | |||||
level++; | |||||
return level; | |||||
} | } | ||||
static PyObject* | static PyObject* | ||||
@@ -175,6 +182,9 @@ Tokenizer_push_textbuffer(Tokenizer* self) | |||||
return 0; | return 0; | ||||
} | } | ||||
/* | |||||
Pop and deallocate the top token stack/context/textbuffer. | |||||
*/ | |||||
static void | static void | ||||
Tokenizer_delete_top_of_stack(Tokenizer* self) | Tokenizer_delete_top_of_stack(Tokenizer* self) | ||||
{ | { | ||||
@@ -857,7 +867,7 @@ Tokenizer_handle_heading_end(Tokenizer* self) | |||||
best++; | best++; | ||||
self->head++; | self->head++; | ||||
} | } | ||||
current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1; | |||||
current = heading_level_from_context(self->topstack->context); | |||||
level = current > best ? (best > 6 ? 6 : best) : | level = current > best ? (best > 6 ? 6 : best) : | ||||
(current > 6 ? 6 : current); | (current > 6 ? 6 : current); | ||||
after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); | after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); | ||||
@@ -181,6 +181,7 @@ typedef struct { | |||||
/* Function prototypes: */ | /* Function prototypes: */ | ||||
static int heading_level_from_context(int); | |||||
static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); | static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); | ||||
static struct Textbuffer* Textbuffer_new(void); | static struct Textbuffer* Textbuffer_new(void); | ||||
static void Tokenizer_dealloc(Tokenizer*); | static void Tokenizer_dealloc(Tokenizer*); | ||||
@@ -253,12 +253,12 @@ class StringMixIn(object): | |||||
if py3k: | if py3k: | ||||
@staticmethod | @staticmethod | ||||
@inheritdoc | @inheritdoc | ||||
def maketrans(self, x, y=None, z=None): | |||||
def maketrans(x, y=None, z=None): | |||||
if z is None: | if z is None: | ||||
if y is None: | if y is None: | ||||
return self.__unicode__.maketrans(x) | |||||
return self.__unicode__.maketrans(x, y) | |||||
return self.__unicode__.maketrans(x, y, z) | |||||
return str.maketrans(x) | |||||
return str.maketrans(x, y) | |||||
return str.maketrans(x, y, z) | |||||
@inheritdoc | @inheritdoc | ||||
def partition(self, sep): | def partition(self, sep): | ||||
@@ -168,7 +168,7 @@ class Wikicode(StringMixIn): | |||||
doc = """Iterate over {0}. | doc = """Iterate over {0}. | ||||
This is equivalent to :py:meth:`{1}` with *forcetype* set to | This is equivalent to :py:meth:`{1}` with *forcetype* set to | ||||
:py:class:`~.{2}`. | |||||
:py:class:`~{2.__module__}.{2.__name__}`. | |||||
""" | """ | ||||
make_ifilter = lambda ftype: (lambda self, **kw: | make_ifilter = lambda ftype: (lambda self, **kw: | ||||
self.ifilter(forcetype=ftype, **kw)) | self.ifilter(forcetype=ftype, **kw)) | ||||
@@ -177,8 +177,8 @@ class Wikicode(StringMixIn): | |||||
for name, ftype in (meths.items() if py3k else meths.iteritems()): | for name, ftype in (meths.items() if py3k else meths.iteritems()): | ||||
ifilter = make_ifilter(ftype) | ifilter = make_ifilter(ftype) | ||||
filter = make_filter(ftype) | filter = make_filter(ftype) | ||||
ifilter.__doc__ = doc.format(name, "ifilter", ftype.__name__) | |||||
filter.__doc__ = doc.format(name, "filter", ftype.__name__) | |||||
ifilter.__doc__ = doc.format(name, "ifilter", ftype) | |||||
filter.__doc__ = doc.format(name, "filter", ftype) | |||||
setattr(cls, "ifilter_" + name, ifilter) | setattr(cls, "ifilter_" + name, ifilter) | ||||
setattr(cls, "filter_" + name, filter) | setattr(cls, "filter_" + name, filter) | ||||
@@ -109,7 +109,7 @@ class TokenizerTestCase(object): | |||||
def build(cls): | def build(cls): | ||||
"""Load and install all tests from the 'tokenizer' directory.""" | """Load and install all tests from the 'tokenizer' directory.""" | ||||
def load_file(filename): | def load_file(filename): | ||||
with open(filename, "r") as fp: | |||||
with open(filename, "rU") as fp: | |||||
text = fp.read() | text = fp.read() | ||||
if not py3k: | if not py3k: | ||||
text = text.decode("utf8") | text = text.decode("utf8") | ||||
@@ -414,10 +414,10 @@ class TestStringMixIn(unittest.TestCase): | |||||
self.assertEqual("Fake String", str1.title()) | self.assertEqual("Fake String", str1.title()) | ||||
if py3k: | if py3k: | ||||
table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", | |||||
117: "5"}) | |||||
table2 = str.maketrans("aeiou", "12345") | |||||
table3 = str.maketrans("aeiou", "12345", "rts") | |||||
table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", | |||||
111: "4", 117: "5"}) | |||||
table2 = StringMixIn.maketrans("aeiou", "12345") | |||||
table3 = StringMixIn.maketrans("aeiou", "12345", "rts") | |||||
self.assertEqual("f1k2 str3ng", str1.translate(table1)) | self.assertEqual("f1k2 str3ng", str1.translate(table1)) | ||||
self.assertEqual("f1k2 str3ng", str1.translate(table2)) | self.assertEqual("f1k2 str3ng", str1.translate(table2)) | ||||
self.assertEqual("f1k2 3ng", str1.translate(table3)) | self.assertEqual("f1k2 3ng", str1.translate(table3)) | ||||