From 81954c50acd88aba523c5064e63a8316692997fb Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 19 Jun 2013 00:22:30 -0400 Subject: [PATCH 01/10] Removing a useless, skipped test. --- tests/test_builder.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index 903d144..2d44b6c 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -190,11 +190,6 @@ class TestBuilder(TreeEqualityTestCase): for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) - @unittest.skip("holding this until feature/html_tags is ready") - def test_tag(self): - """tests for building Tag nodes""" - pass - def test_integration(self): """a test for building a combination of templates together""" # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} From 3fb8f3214c91bcd63b5fe4e3a0206a05f8038c39 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 19 Jun 2013 00:39:46 -0400 Subject: [PATCH 02/10] Fix StringMixIn.maketrans() on Py3k. - Make a test in Py3k actually use StringMixIn instead of str. - Minor cosmetic fix. --- mwparserfromhell/nodes/template.py | 2 +- mwparserfromhell/string_mixin.py | 8 ++++---- tests/test_string_mixin.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 3834d41..6dfc4f0 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -293,7 +293,7 @@ class Template(Node): """ name = name.strip() if isinstance(name, basestring) else str(name) removed = False - to_remove =[] + to_remove = [] for i, param in enumerate(self.params): if param.name.strip() == name: if keep_field: diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 89c1bc0..a406401 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -253,12 +253,12 @@ class StringMixIn(object): if py3k: @staticmethod @inheritdoc - def maketrans(self, x, y=None, z=None): + def maketrans(x, y=None, z=None): if z is None: if y is None: - return self.__unicode__.maketrans(x) - return self.__unicode__.maketrans(x, y) - return self.__unicode__.maketrans(x, y, z) + return str.maketrans(x) + return str.maketrans(x, y) + return str.maketrans(x, y, z) @inheritdoc def partition(self, sep): diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 306f2fd..b829bb2 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -414,10 +414,10 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("Fake String", str1.title()) if py3k: - table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", - 117: "5"}) - table2 = str.maketrans("aeiou", "12345") - table3 = str.maketrans("aeiou", "12345", "rts") + table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", + 111: "4", 117: "5"}) + table2 = StringMixIn.maketrans("aeiou", "12345") + table3 = StringMixIn.maketrans("aeiou", "12345", "rts") self.assertEqual("f1k2 str3ng", str1.translate(table1)) self.assertEqual("f1k2 str3ng", str1.translate(table2)) self.assertEqual("f1k2 3ng", str1.translate(table3)) From 22d7995d9b6c47407e0f130df8146debe03c6066 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 19 Jun 2013 21:07:41 -0400 Subject: [PATCH 03/10] Fix newline behavior when loading test files on Windows. --- tests/_test_tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 382a9bf..c1d49cb 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -109,7 +109,7 @@ class TokenizerTestCase(object): def build(cls): """Load and install all tests from the 'tokenizer' directory.""" def load_file(filename): - with open(filename, "r") as fp: + with open(filename, "rU") as fp: text = fp.read() if not py3k: text = text.decode("utf8") From 25a9f4fe327d5fc95a5b1fb8302934a2b1d03294 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 19 Jun 2013 21:08:34 -0400 Subject: [PATCH 04/10] Add .dll to .gitignore for builds on Windows. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ec4e8ca..4068716 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.pyc *.so +*.dll *.egg *.egg-info .DS_Store From a68946757758a7c6936dbe8c8c9295ef263ca97d Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 20 Jun 2013 16:17:39 -0400 Subject: [PATCH 05/10] Replace broken log2 function; add a missing comment. --- mwparserfromhell/parser/tokenizer.c | 16 +++++++++++++--- mwparserfromhell/parser/tokenizer.h | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 939f30c..df65d0e 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -23,9 +23,16 @@ SOFTWARE. #include "tokenizer.h" -double log2(double n) +/* + Given a context, return the heading level encoded within it. +*/ +static int heading_level_from_context(int n) { - return log(n) / log(2); + int level; + n /= LC_HEADING_LEVEL_1; + for (level = 1; n > 1; n >>= 1) + level++; + return level; } static PyObject* @@ -175,6 +182,9 @@ Tokenizer_push_textbuffer(Tokenizer* self) return 0; } +/* + Pop and deallocate the top token stack/context/textbuffer. +*/ static void Tokenizer_delete_top_of_stack(Tokenizer* self) { @@ -858,7 +868,7 @@ Tokenizer_handle_heading_end(Tokenizer* self) best++; self->head++; } - current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1; + current = heading_level_from_context(self->topstack->context); level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index cdc0cca..1f58c49 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -181,6 +181,7 @@ typedef struct { /* Function prototypes: */ +static int heading_level_from_context(int); static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); static struct Textbuffer* Textbuffer_new(void); static void Tokenizer_dealloc(Tokenizer*); From 72473b433a8219c28245c0d560e9bb30f4df30de Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 20 Jun 2013 17:47:13 -0400 Subject: [PATCH 06/10] Adding a changelog (closes #23) --- CHANGELOG | 33 +++++++++++++++++++++++++++++++ docs/changelog.rst | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 3 files changed, 92 insertions(+) create mode 100644 CHANGELOG create mode 100644 docs/changelog.rst diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..9772f8b --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,33 @@ +v0.1.1 (19da4d2144) to v0.2: + +- The parser now fully supports Python 3 in addition to Python 2.7. +- Added a C tokenizer extension that is significantly faster than its Python + equivalent. It is enabled by default (if available) and can be toggled by + setting `mwparserfromhell.parser.use_c` to a boolean value. +- Added a complete set of unit tests covering parsing and wikicode + manipulation. +- Renamed Wikicode.filter_links() to filter_wikilinks() (applies to ifilter as + well). +- Added filter methods for Arguments, Comments, Headings, and HTMLEntities. +- Added 'before' param to Template.add(); renamed 'force_nonconformity' to + 'preserve_spacing'. +- Added 'include_lead' param to Wikicode.get_sections(). +- Removed 'flat' param from Wikicode.get_sections(). +- Removed 'force_no_field' param from Template.remove(). +- Added support for Travis CI. +- Added note about Windows build issue in the README. +- The tokenizer will limit itself to a realistic recursion depth to prevent + errors and unreasonably long parse times. +- Fixed how some nodes' attribute setters handle input. +- Fixed multiple bugs in the tokenizer's handling of invalid markup. +- Fixed bugs in the implementation of SmartList and StringMixIn. +- Fixed some broken example code in the README; other copyedits. +- Other bugfixes and code cleanup. + +v0.1 (ba94938fe8) to v0.1.1 (19da4d2144): + +- Added support for Comments () and Wikilinks ([[foo]]). +- Added corresponding ifilter_links() and filter_links() methods to Wikicode. +- Fixed a bug when parsing incomplete templates. +- Fixed strip_code() to affect the contents of headings. +- Various copyedits in documentation and comments. diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 100644 index 0000000..0e8bbef --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1,58 @@ +Changelog +========= + +v0.2 +---- + +19da4d2144_ to master_ (released June 20, 2013) + +- The parser now fully supports Python 3 in addition to Python 2.7. +- Added a C tokenizer extension that is significantly faster than its Python + equivalent. It is enabled by default (if available) and can be toggled by + setting :py:attr:`mwparserfromhell.parser.use_c` to a boolean value. +- Added a complete set of unit tests covering parsing and wikicode + manipulation. +- Renamed :py:meth:`.filter_links` to :py:meth:`.filter_wikilinks` (applies to + :py:meth:`.ifilter` as well). +- Added filter methods for :py:class:`Arguments <.Argument>`, + :py:class:`Comments <.Comment>`, :py:class:`Headings <.Heading>`, and + :py:class:`HTMLEntities <.HTMLEntity>`. +- Added *before* param to :py:meth:`Template.add() <.Template.add>`; renamed + *force_nonconformity* to *preserve_spacing*. +- Added *include_lead* param to :py:meth:`Wikicode.get_sections() + <.get_sections>`. +- Removed *flat* param from :py:meth:`.get_sections`. +- Removed *force_no_field* param from :py:meth:`Template.remove() + <.Template.remove>`. +- Added support for Travis CI. +- Added note about Windows build issue in the README. +- The tokenizer will limit itself to a realistic recursion depth to prevent + errors and unreasonably long parse times. +- Fixed how some nodes' attribute setters handle input. +- Fixed multiple bugs in the tokenizer's handling of invalid markup. +- Fixed bugs in the implementation of :py:class:`.SmartList` and + :py:class:`.StringMixIn`. +- Fixed some broken example code in the README; other copyedits. +- Other bugfixes and code cleanup. + +v0.1.1 +------ + +ba94938fe8_ to 19da4d2144_ (released September 21, 2012) + +- Added support for :py:class:`Comments <.Comment>` (````) and + :py:class:`Wikilinks <.Wikilink>` (``[[foo]]``). +- Added corresponding :py:meth:`.ifilter_links` and :py:meth:`.filter_links` + methods to :py:class:`.Wikicode`. +- Fixed a bug when parsing incomplete templates. +- Fixed :py:meth:`.strip_code` to affect the contents of headings. +- Various copyedits in documentation and comments. + +v0.1 +---- + +ba94938fe8_ (released August 23, 2012) + +.. _master: https://github.com/earwig/mwparserfromhell/tree/v0.2 +.. _19da4d2144: https://github.com/earwig/mwparserfromhell/tree/v0.1.1 +.. _ba94938fe8: https://github.com/earwig/mwparserfromhell/tree/v0.1 diff --git a/docs/index.rst b/docs/index.rst index 4b4c392..4355b61 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -41,6 +41,7 @@ Contents usage integration + changelog API Reference From bbaf09dbf8fc2795c424f0934e4dce9924edb009 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 20 Jun 2013 18:07:41 -0400 Subject: [PATCH 07/10] Fix docstrings of generated filter methods. --- mwparserfromhell/wikicode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 581707d..4ec889e 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -168,7 +168,7 @@ class Wikicode(StringMixIn): doc = """Iterate over {0}. This is equivalent to :py:meth:`{1}` with *forcetype* set to - :py:class:`~.{2}`. + :py:class:`~{2.__module__}.{2.__name__}`. """ make_ifilter = lambda ftype: (lambda self, **kw: self.ifilter(forcetype=ftype, **kw)) @@ -177,8 +177,8 @@ class Wikicode(StringMixIn): for name, ftype in (meths.items() if py3k else meths.iteritems()): ifilter = make_ifilter(ftype) filter = make_filter(ftype) - ifilter.__doc__ = doc.format(name, "ifilter", ftype.__name__) - filter.__doc__ = doc.format(name, "filter", ftype.__name__) + ifilter.__doc__ = doc.format(name, "ifilter", ftype) + filter.__doc__ = doc.format(name, "filter", ftype) setattr(cls, "ifilter_" + name, ifilter) setattr(cls, "filter_" + name, filter) From edf6a3a8a6ad4c31cf8649a273b4e4d0e275003a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 20 Jun 2013 18:13:52 -0400 Subject: [PATCH 08/10] release/0.2 --- mwparserfromhell/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 99bc0c2..5db2d4c 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -31,7 +31,7 @@ from __future__ import unicode_literals __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.2.dev" +__version__ = "0.2" __email__ = "ben.kurtovic@verizon.net" from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode From 58d9194a2c4620e948024bdb819bd1f484071227 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 21 Jun 2013 00:32:26 -0400 Subject: [PATCH 09/10] Version bump for v0.3; fix permissions on compat.py. --- CHANGELOG | 2 +- docs/changelog.rst | 4 ++-- docs/index.rst | 2 +- mwparserfromhell/__init__.py | 2 +- mwparserfromhell/compat.py | 0 5 files changed, 5 insertions(+), 5 deletions(-) mode change 100755 => 100644 mwparserfromhell/compat.py diff --git a/CHANGELOG b/CHANGELOG index 9772f8b..961d33d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -v0.1.1 (19da4d2144) to v0.2: +v0.1.1 (19da4d2144) to v0.2 (edf6a3a8a6): - The parser now fully supports Python 3 in addition to Python 2.7. - Added a C tokenizer extension that is significantly faster than its Python diff --git a/docs/changelog.rst b/docs/changelog.rst index 0e8bbef..0f7347a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,7 +4,7 @@ Changelog v0.2 ---- -19da4d2144_ to master_ (released June 20, 2013) +19da4d2144_ to edf6a3a8a6_ (released June 20, 2013) - The parser now fully supports Python 3 in addition to Python 2.7. - Added a C tokenizer extension that is significantly faster than its Python @@ -53,6 +53,6 @@ v0.1 ba94938fe8_ (released August 23, 2012) -.. _master: https://github.com/earwig/mwparserfromhell/tree/v0.2 +.. _edf6a3a8a6: https://github.com/earwig/mwparserfromhell/tree/v0.2 .. _19da4d2144: https://github.com/earwig/mwparserfromhell/tree/v0.1.1 .. _ba94938fe8: https://github.com/earwig/mwparserfromhell/tree/v0.1 diff --git a/docs/index.rst b/docs/index.rst index 4355b61..f2e3345 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,4 +1,4 @@ -MWParserFromHell v0.2 Documentation +MWParserFromHell v0.3 Documentation =================================== :py:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 5db2d4c..738d4c2 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -31,7 +31,7 @@ from __future__ import unicode_literals __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.2" +__version__ = "0.3.dev" __email__ = "ben.kurtovic@verizon.net" from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py old mode 100755 new mode 100644 From 7b6b46da953948165072832d1979e0377ddece4a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 22 Jun 2013 22:24:36 -0400 Subject: [PATCH 10/10] Some documentation cleanup. --- CHANGELOG | 12 ++++++++++-- README.rst | 4 +++- docs/changelog.rst | 20 ++++++++++++++------ docs/index.rst | 4 ++-- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 961d33d..cbe2933 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,8 @@ -v0.1.1 (19da4d2144) to v0.2 (edf6a3a8a6): +v0.3 (unreleased): + +- Various fixes and cleanup. + +v0.2 (released June 20, 2013): - The parser now fully supports Python 3 in addition to Python 2.7. - Added a C tokenizer extension that is significantly faster than its Python @@ -24,10 +28,14 @@ v0.1.1 (19da4d2144) to v0.2 (edf6a3a8a6): - Fixed some broken example code in the README; other copyedits. - Other bugfixes and code cleanup. -v0.1 (ba94938fe8) to v0.1.1 (19da4d2144): +v0.1.1 (released September 21, 2012): - Added support for Comments () and Wikilinks ([[foo]]). - Added corresponding ifilter_links() and filter_links() methods to Wikicode. - Fixed a bug when parsing incomplete templates. - Fixed strip_code() to affect the contents of headings. - Various copyedits in documentation and comments. + +v0.1 (released August 23, 2012): + +- Initial release. diff --git a/README.rst b/README.rst index 77c01eb..df4d732 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,8 @@ mwparserfromhell that provides an easy-to-use and outrageously powerful parser for MediaWiki_ wikicode. It supports Python 2 and Python 3. -Developed by Earwig_ with help from `Σ`_. +Developed by Earwig_ with help from `Σ`_. Full documentation is available on +ReadTheDocs_. Installation ------------ @@ -142,6 +143,7 @@ following code (via the API_):: return mwparserfromhell.parse(text) .. _MediaWiki: http://mediawiki.org +.. _ReadTheDocs: http://mwparserfromhell.readthedocs.org .. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig .. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 .. _Python Package Index: http://pypi.python.org diff --git a/docs/changelog.rst b/docs/changelog.rst index 0f7347a..4bf86b7 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,10 +1,19 @@ Changelog ========= +v0.3 +---- + +Unreleased +(`changes `__): + +- Various fixes and cleanup. + v0.2 ---- -19da4d2144_ to edf6a3a8a6_ (released June 20, 2013) +`Released June 20, 2013 `_ +(`changes `__): - The parser now fully supports Python 3 in addition to Python 2.7. - Added a C tokenizer extension that is significantly faster than its Python @@ -38,7 +47,8 @@ v0.2 v0.1.1 ------ -ba94938fe8_ to 19da4d2144_ (released September 21, 2012) +`Released September 21, 2012 `_ +(`changes `__): - Added support for :py:class:`Comments <.Comment>` (````) and :py:class:`Wikilinks <.Wikilink>` (``[[foo]]``). @@ -51,8 +61,6 @@ ba94938fe8_ to 19da4d2144_ (released September 21, 2012) v0.1 ---- -ba94938fe8_ (released August 23, 2012) +`Released August 23, 2012 `_: -.. _edf6a3a8a6: https://github.com/earwig/mwparserfromhell/tree/v0.2 -.. _19da4d2144: https://github.com/earwig/mwparserfromhell/tree/v0.1.1 -.. _ba94938fe8: https://github.com/earwig/mwparserfromhell/tree/v0.1 +- Initial release. diff --git a/docs/index.rst b/docs/index.rst index f2e3345..0603daf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,5 +1,5 @@ -MWParserFromHell v0.3 Documentation -=================================== +MWParserFromHell v\ |version| Documentation +=========================================== :py:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for