From e5ad4639ff966b2d58a0198b538c27d67c6be693 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 14 Sep 2013 18:15:48 -0400 Subject: [PATCH 01/39] Wikicode.i?filter*()'s matches argument now takes functions (closes #47) Bump version to 0.4.dev. --- CHANGELOG | 5 +++++ docs/changelog.rst | 10 ++++++++++ mwparserfromhell/__init__.py | 2 +- mwparserfromhell/wikicode.py | 20 ++++++++++++-------- tests/test_wikicode.py | 20 +++++++++++++------- 5 files changed, 41 insertions(+), 16 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 99eff38..230236b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,8 @@ +v0.4 (unreleased): + +- The 'matches' argument of Wikicode's filter methods now accepts a function + (taking one argument, a Node, and returning a bool) in addition to a regex. + v0.3.2 (released September 1, 2013): - Added support for Python 3.2 (along with current support for 3.3 and 2.7). diff --git a/docs/changelog.rst b/docs/changelog.rst index e72baef..ed8372d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,16 @@ Changelog ========= +v0.4 +---- + +Unreleased +(`changes `__): + +- The *matches* argument of :py:class:`Wikicode's <.Wikicode>` + :py:meth:`.filter` methods now accepts a function (taking one argument, a + :py:class:`.Node`, and returning a bool) in addition to a regex. + v0.3.2 ------ diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 6569d96..3c011d0 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -31,7 +31,7 @@ from __future__ import unicode_literals __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.3.2" +__version__ = "0.4.dev" __email__ = "ben.kurtovic@verizon.net" from . import (compat, definitions, nodes, parser, smart_list, string_mixin, diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 08fd469..be751ed 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -378,17 +378,21 @@ class Wikicode(StringMixIn): """Iterate over nodes in our list matching certain conditions. If *recursive* is ``True``, we will iterate over our children and all - descendants of our children, otherwise just our immediate children. If - *matches* is given, we will only yield the nodes that match the given - regular expression (with :py:func:`re.search`). The default flags used - are :py:const:`re.IGNORECASE`, :py:const:`re.DOTALL`, and - :py:const:`re.UNICODE`, but custom flags can be specified by passing - *flags*. If *forcetype* is given, only nodes that are instances of this - type are yielded. + of their descendants, otherwise just our immediate children. If + *forcetype* is given, only nodes that are instances of this type are + yielded. *matches* can be used to further restrict the nodes, either as + a function (taking a single :py:class:`.Node` and returning a boolean) + or a regular expression (matched against the node's string + representation with :py:func:`re.search`). If *matches* is a regex, the + flags passed to :py:func:`re.search` are :py:const:`re.IGNORECASE`, + :py:const:`re.DOTALL`, and :py:const:`re.UNICODE`, but custom flags can + be specified by passing *flags*. """ + if matches and not callable(matches): + pat, matches = matches, lambda obj: re.search(pat, str(obj), flags) for node in (self._get_all_nodes(self) if recursive else self.nodes): if not forcetype or isinstance(node, forcetype): - if not matches or re.search(matches, str(node), flags): + if not matches or matches(node): yield node def filter(self, recursive=True, matches=None, flags=FLAGS, diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 14d801c..31fa82f 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -256,7 +256,7 @@ class TestWikicode(TreeEqualityTestCase): def genlist(gen): self.assertIsInstance(gen, GeneratorType) return list(gen) - ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw))) + ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k))) code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") for func in (code.filter, ifilter(code)): @@ -292,21 +292,27 @@ class TestWikicode(TreeEqualityTestCase): "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], func(recursive=True, forcetype=Template)) - code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}") + code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}") for func in (code3.filter, ifilter(code3)): - self.assertEqual(["{{foobar}}", "{{FOO}}"], func(recursive=False, matches=r"foo")) + self.assertEqual(["{{foobar}}", "{{barfoo}}"], + func(False, matches=lambda node: "foo" in node)) + self.assertEqual(["{{foobar}}", "{{FOO}}", "{{barfoo}}"], + func(False, matches=r"foo")) self.assertEqual(["{{foobar}}", "{{FOO}}"], - func(recursive=False, matches=r"^{{foo.*?}}")) + func(matches=r"^{{foo.*?}}")) self.assertEqual(["{{foobar}}"], - func(recursive=False, matches=r"^{{foo.*?}}", flags=re.UNICODE)) - self.assertEqual(["{{baz}}", "{{bz}}"], func(recursive=False, matches=r"^{{b.*?z")) - self.assertEqual(["{{baz}}"], func(recursive=False, matches=r"^{{b.+?z}}")) + func(matches=r"^{{foo.*?}}", flags=re.UNICODE)) + self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) + self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], code2.filter_templates(recursive=False)) self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], code2.filter_templates(recursive=True)) + + self.assertEqual(["{{foobar}}"], code3.filter_templates( + matches=lambda node: node.name.matches("Foobar"))) self.assertEqual(["{{baz}}", "{{bz}}"], code3.filter_templates(matches=r"^{{b.*?z")) self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) From 5e6c994c2cbc816bd1d8107726b10b1c049d7aad Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 21 Sep 2013 14:48:05 -0400 Subject: [PATCH 02/39] Wikicode.matches() now accepts tuples (closes #48) --- CHANGELOG | 2 ++ docs/changelog.rst | 3 +++ mwparserfromhell/wikicode.py | 18 ++++++++++++------ tests/test_wikicode.py | 8 ++++++++ 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 230236b..43309f6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,8 @@ v0.4 (unreleased): - The 'matches' argument of Wikicode's filter methods now accepts a function (taking one argument, a Node, and returning a bool) in addition to a regex. +- Wikicode.matches() now accepts a tuple of strings/Wikicode objects instead of + just a single string or Wikicode. v0.3.2 (released September 1, 2013): diff --git a/docs/changelog.rst b/docs/changelog.rst index ed8372d..7ac5c9a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,6 +10,9 @@ Unreleased - The *matches* argument of :py:class:`Wikicode's <.Wikicode>` :py:meth:`.filter` methods now accepts a function (taking one argument, a :py:class:`.Node`, and returning a bool) in addition to a regex. +- :py:meth:`.Wikicode.matches` now accepts a tuple of + strings/:py:class:`.Wikicode` objects instead of just a single string or + :py:class:`.Wikicode`. v0.3.2 ------ diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index be751ed..b7d2ab2 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -362,16 +362,22 @@ class Wikicode(StringMixIn): """Do a loose equivalency test suitable for comparing page names. *other* can be any string-like object, including - :py:class:`~.Wikicode`. This operation is symmetric; both sides are - adjusted. Specifically, whitespace and markup is stripped and the first - letter's case is normalized. Typical usage is + :py:class:`~.Wikicode`, or a tuple of these. This operation is + symmetric; both sides are adjusted. Specifically, whitespace and markup + is stripped and the first letter's case is normalized. Typical usage is ``if template.name.matches("stub"): ...``. """ + cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:] + if a and b else a == b) this = self.strip_code().strip() + if isinstance(other, tuple): + for obj in other: + that = parse_anything(obj).strip_code().strip() + if cmp(this, that): + return True + return False that = parse_anything(other).strip_code().strip() - if not this or not that: - return this == that - return this[0].upper() + this[1:] == that[0].upper() + that[1:] + return cmp(this, that) def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None): diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 31fa82f..632c6b4 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -242,6 +242,7 @@ class TestWikicode(TreeEqualityTestCase): """test Wikicode.matches()""" code1 = parse("Cleanup") code2 = parse("\nstub") + code3 = parse("") self.assertTrue(code1.matches("Cleanup")) self.assertTrue(code1.matches("cleanup")) self.assertTrue(code1.matches(" cleanup\n")) @@ -250,6 +251,13 @@ class TestWikicode(TreeEqualityTestCase): self.assertTrue(code2.matches("stub")) self.assertTrue(code2.matches("Stub")) self.assertFalse(code2.matches("StuB")) + self.assertTrue(code1.matches(("cleanup", "stub"))) + self.assertTrue(code2.matches(("cleanup", "stub"))) + self.assertFalse(code2.matches(("StuB", "sTUb", "foobar"))) + self.assertTrue(code2.matches(("StuB", "sTUb", "foo", "bar", "Stub"))) + self.assertTrue(code3.matches("")) + self.assertTrue(code3.matches("")) + self.assertTrue(code3.matches(("a", "b", ""))) def test_filter_family(self): """test the Wikicode.i?filter() family of functions""" From cf9055722959ed355a4295b1497ebd89f1403c8f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 15 Oct 2013 18:23:11 -0400 Subject: [PATCH 03/39] Template.has() should be passed ignore_empty=False by default. **Breaking change**; closes #51. --- CHANGELOG | 3 +++ docs/changelog.rst | 4 ++++ mwparserfromhell/nodes/template.py | 4 ++-- tests/test_template.py | 19 +++++++++++-------- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 43309f6..a22463a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ v0.4 (unreleased): +- Template.has() is now passed 'ignore_empty=False' by default instead of True. + This fixes a bug when adding parameters to templates with empty fields, and + is a breaking change if you rely on the default behavior. - The 'matches' argument of Wikicode's filter methods now accepts a function (taking one argument, a Node, and returning a bool) in addition to a regex. - Wikicode.matches() now accepts a tuple of strings/Wikicode objects instead of diff --git a/docs/changelog.rst b/docs/changelog.rst index 7ac5c9a..7d9ced7 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,10 @@ v0.4 Unreleased (`changes `__): +- :py:meth:`.Template.has` is now passed *ignore_empty=False* by default + instead of *True*. This fixes a bug when adding parameters to templates with + empty fields, **and is a breaking change if you rely on the default + behavior.** - The *matches* argument of :py:class:`Wikicode's <.Wikicode>` :py:meth:`.filter` methods now accepts a function (taking one argument, a :py:class:`.Node`, and returning a bool) in addition to a regex. diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 1b4e3fa..7a9779b 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -174,7 +174,7 @@ class Template(Node): def name(self, value): self._name = parse_anything(value) - def has(self, name, ignore_empty=True): + def has(self, name, ignore_empty=False): """Return ``True`` if any parameter in the template is named *name*. With *ignore_empty*, ``False`` will be returned even if the template @@ -190,7 +190,7 @@ class Template(Node): return True return False - has_param = lambda self, name, ignore_empty=True: \ + has_param = lambda self, name, ignore_empty=False: \ self.has(name, ignore_empty) has_param.__doc__ = "Alias for :py:meth:`has`." diff --git a/tests/test_template.py b/tests/test_template.py index 26a2e39..2294baf 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -123,15 +123,15 @@ class TestTemplate(TreeEqualityTestCase): node3 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) - self.assertFalse(node1.has("foobar")) - self.assertTrue(node2.has(1)) - self.assertTrue(node2.has("abc")) - self.assertFalse(node2.has("def")) - self.assertTrue(node3.has("1")) - self.assertTrue(node3.has(" b ")) - self.assertFalse(node4.has("b")) - self.assertTrue(node3.has("b", False)) + self.assertFalse(node1.has("foobar", False)) + self.assertTrue(node2.has(1, False)) + self.assertTrue(node2.has("abc", False)) + self.assertFalse(node2.has("def", False)) + self.assertTrue(node3.has("1", False)) + self.assertTrue(node3.has(" b ", False)) self.assertTrue(node4.has("b", False)) + self.assertTrue(node3.has("b", True)) + self.assertFalse(node4.has("b", True)) def test_get(self): """test Template.get()""" @@ -223,6 +223,7 @@ class TestTemplate(TreeEqualityTestCase): pgenh("1", "c"), pgenh("2", "d")]) node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), pgens("f", "g")]) + node41 = Template(wraptext("a"), [pgenh("1", "")]) node1.add("e", "f", showkey=True) node2.add(2, "g", showkey=False) @@ -266,6 +267,7 @@ class TestTemplate(TreeEqualityTestCase): node38.add("1", "e") node39.add("1", "e") node40.add("d", "h", before="b") + node41.add(1, "b") self.assertEqual("{{a|b=c|d|e=f}}", node1) self.assertEqual("{{a|b=c|d|g}}", node2) @@ -312,6 +314,7 @@ class TestTemplate(TreeEqualityTestCase): self.assertEqual("{{a|1=e|x=y|2=d}}", node38) self.assertEqual("{{a|x=y|e|d}}", node39) self.assertEqual("{{a|b=c|d=h|f=g}}", node40) + self.assertEqual("{{a|b}}", node41) def test_remove(self): """test Template.remove()""" From 38050f687845741daef97938fb3af39f03e76708 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 15 Oct 2013 19:07:07 -0400 Subject: [PATCH 04/39] C code cleanup and speed improvements. --- CHANGELOG | 1 + docs/changelog.rst | 1 + mwparserfromhell/parser/tokenizer.c | 255 ++++++++++++++++++------------------ mwparserfromhell/parser/tokenizer.h | 8 +- 4 files changed, 132 insertions(+), 133 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a22463a..a00f8f3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,7 @@ v0.4 (unreleased): (taking one argument, a Node, and returning a bool) in addition to a regex. - Wikicode.matches() now accepts a tuple of strings/Wikicode objects instead of just a single string or Wikicode. +- C code cleanup and speed improvements. v0.3.2 (released September 1, 2013): diff --git a/docs/changelog.rst b/docs/changelog.rst index 7d9ced7..6708f0f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -17,6 +17,7 @@ Unreleased - :py:meth:`.Wikicode.matches` now accepts a tuple of strings/:py:class:`.Wikicode` objects instead of just a single string or :py:class:`.Wikicode`. +- C code cleanup and speed improvements. v0.3.2 ------ diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 609a595..1823006 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -31,7 +31,7 @@ static int is_marker(Py_UNICODE this) int i; for (i = 0; i < NUM_MARKERS; i++) { - if (*MARKERS[i] == this) + if (MARKERS[i] == this) return 1; } return 0; @@ -642,7 +642,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) PyObject *tokenlist; self->head += 2; - while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) { + while (Tokenizer_READ(self, 0) == '{' && braces < MAX_BRACES) { self->head++; braces++; } @@ -674,8 +674,8 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) if (BAD_ROUTE) { char text[MAX_BRACES + 1]; RESET_ROUTE(); - for (i = 0; i < braces; i++) text[i] = *"{"; - text[braces] = *""; + for (i = 0; i < braces; i++) text[i] = '{'; + text[braces] = '\0'; if (Tokenizer_emit_text_then_stack(self, text)) { Py_XDECREF(text); return -1; @@ -872,7 +872,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) if (Tokenizer_push(self, LC_EXT_LINK_URI)) return -1; - if (Tokenizer_READ(self, 0) == *"/" && Tokenizer_READ(self, 1) == *"/") { + if (Tokenizer_READ(self, 0) == '/' && Tokenizer_READ(self, 1) == '/') { if (Tokenizer_emit_text(self, "//")) return -1; self->head += 2; @@ -881,7 +881,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) buffer = Textbuffer_new(); if (!buffer) return -1; - while ((this = Tokenizer_READ(self, 0)) != *"") { + while ((this = Tokenizer_READ(self, 0))) { i = 0; while (1) { if (!valid[i]) @@ -898,18 +898,18 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) self->head++; } end_of_loop: - if (this != *":") { + if (this != ':') { Textbuffer_dealloc(buffer); Tokenizer_fail_route(self); return 0; } - if (Tokenizer_emit_char(self, *":")) { + if (Tokenizer_emit_char(self, ':')) { Textbuffer_dealloc(buffer); return -1; } self->head++; - slashes = (Tokenizer_READ(self, 0) == *"/" && - Tokenizer_READ(self, 1) == *"/"); + slashes = (Tokenizer_READ(self, 0) == '/' && + Tokenizer_READ(self, 1) == '/'); if (slashes) { if (Tokenizer_emit_text(self, "//")) { Textbuffer_dealloc(buffer); @@ -973,8 +973,8 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) Textbuffer_dealloc(scheme_buffer); return -1; } - slashes = (Tokenizer_READ(self, 0) == *"/" && - Tokenizer_READ(self, 1) == *"/"); + slashes = (Tokenizer_READ(self, 0) == '/' && + Tokenizer_READ(self, 1) == '/'); if (!IS_SCHEME(scheme, slashes, 1)) { Py_DECREF(scheme); Textbuffer_dealloc(scheme_buffer); @@ -988,7 +988,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) } if (Tokenizer_emit_textbuffer(self, scheme_buffer, 1)) return -1; - if (Tokenizer_emit_char(self, *":")) + if (Tokenizer_emit_char(self, ':')) return -1; if (slashes) { if (Tokenizer_emit_text(self, "//")) @@ -1014,13 +1014,13 @@ Tokenizer_handle_free_link_text(Tokenizer* self, int* parens, return error; \ } - if (this == *"(" && !(*parens)) { + if (this == '(' && !(*parens)) { *parens = 1; PUSH_TAIL_BUFFER(*tail, -1) } - else if (this == *"," || this == *";" || this == *"\\" || this == *"." || - this == *":" || this == *"!" || this == *"?" || - (!(*parens) && this == *")")) + else if (this == ',' || this == ';' || this == '\\' || this == '.' || + this == ':' || this == '!' || this == '?' || + (!(*parens) && this == ')')) return Textbuffer_write(tail, this); else PUSH_TAIL_BUFFER(*tail, -1) @@ -1037,12 +1037,12 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next) Py_UNICODE after = Tokenizer_READ(self, 2); int ctx = self->topstack->context; - return (this == *"" || this == *"\n" || this == *"[" || this == *"]" || - this == *"<" || this == *">" || (this == *"'" && next == *"'") || - (this == *"|" && ctx & LC_TEMPLATE) || - (this == *"=" && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || - (this == *"}" && next == *"}" && - (ctx & LC_TEMPLATE || (after == *"}" && ctx & LC_ARGUMENT)))); + return (!this || this == '\n' || this == '[' || this == ']' || + this == '<' || this == '>' || (this == '\'' && next == '\'') || + (this == '|' && ctx & LC_TEMPLATE) || + (this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || + (this == '}' && next == '}' && + (ctx & LC_TEMPLATE || (after == '}' && ctx & LC_ARGUMENT)))); } /* @@ -1061,21 +1061,21 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, if (BAD_ROUTE) return NULL; this = Tokenizer_READ(self, 0); - if (this == *"" || this == *"\n" || this == *" " || this == *"]") + if (!this || this == '\n' || this == ' ' || this == ']') return Tokenizer_fail_route(self); - if (!brackets && this == *"[") + if (!brackets && this == '[') return Tokenizer_fail_route(self); while (1) { this = Tokenizer_READ(self, 0); next = Tokenizer_READ(self, 1); - if (this == *"&") { + if (this == '&') { PUSH_TAIL_BUFFER(*extra, NULL) if (Tokenizer_parse_entity(self)) return NULL; } - else if (this == *"<" && next == *"!" - && Tokenizer_READ(self, 2) == *"-" - && Tokenizer_READ(self, 3) == *"-") { + else if (this == '<' && next == '!' + && Tokenizer_READ(self, 2) == '-' + && Tokenizer_READ(self, 3) == '-') { PUSH_TAIL_BUFFER(*extra, NULL) if (Tokenizer_parse_comment(self)) return NULL; @@ -1084,16 +1084,16 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, self->head--; return Tokenizer_pop(self); } - else if (this == *"" || this == *"\n") + else if (!this || this == '\n') return Tokenizer_fail_route(self); - else if (this == *"{" && next == *"{" && Tokenizer_CAN_RECURSE(self)) { + else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) { PUSH_TAIL_BUFFER(*extra, NULL) if (Tokenizer_parse_template_or_argument(self)) return NULL; } - else if (this == *"]") + else if (this == ']') return Tokenizer_pop(self); - else if (this == *" ") { + else if (this == ' ') { if (brackets) { if (Tokenizer_emit(self, ExternalLinkSeparator)) return NULL; @@ -1102,7 +1102,7 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, self->head++; return Tokenizer_parse(self, 0, 0); } - if (Textbuffer_write(extra, *" ")) + if (Textbuffer_write(extra, ' ')) return NULL; return Tokenizer_pop(self); } @@ -1232,7 +1232,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) self->global |= GL_HEADING; self->head += 1; - while (Tokenizer_READ(self, 0) == *"=") { + while (Tokenizer_READ(self, 0) == '=') { best++; self->head++; } @@ -1242,7 +1242,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) RESET_ROUTE(); self->head = reset + best - 1; for (i = 0; i < best; i++) { - if (Tokenizer_emit_char(self, *"=")) + if (Tokenizer_emit_char(self, '=')) return -1; } self->global ^= GL_HEADING; @@ -1271,7 +1271,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) if (heading->level < best) { diff = best - heading->level; for (i = 0; i < diff; i++) { - if (Tokenizer_emit_char(self, *"=")) { + if (Tokenizer_emit_char(self, '=')) { Py_DECREF(heading->title); free(heading); return -1; @@ -1303,7 +1303,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) self->head += 1; best = 1; - while (Tokenizer_READ(self, 0) == *"=") { + while (Tokenizer_READ(self, 0) == '=') { best++; self->head++; } @@ -1316,7 +1316,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) if (level < best) { diff = best - level; for (i = 0; i < diff; i++) { - if (Tokenizer_emit_char(self, *"=")) + if (Tokenizer_emit_char(self, '=')) return NULL; } } @@ -1324,7 +1324,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) } else { for (i = 0; i < best; i++) { - if (Tokenizer_emit_char(self, *"=")) { + if (Tokenizer_emit_char(self, '=')) { Py_DECREF(after->title); free(after); return NULL; @@ -1372,21 +1372,21 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) return -1; self->head++; this = Tokenizer_READ(self, 0); - if (this == *"") { + if (!this) { Tokenizer_fail_route(self); return 0; } - if (this == *"#") { + if (this == '#') { numeric = 1; if (Tokenizer_emit(self, HTMLEntityNumeric)) return -1; self->head++; this = Tokenizer_READ(self, 0); - if (this == *"") { + if (!this) { Tokenizer_fail_route(self); return 0; } - if (this == *"x" || this == *"X") { + if (this == 'x' || this == 'X') { hexadecimal = 1; kwargs = PyDict_New(); if (!kwargs) @@ -1416,22 +1416,20 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) zeroes = 0; while (1) { this = Tokenizer_READ(self, 0); - if (this == *";") { + if (this == ';') { if (i == 0) FAIL_ROUTE_AND_EXIT() break; } - if (i == 0 && this == *"0") { + if (i == 0 && this == '0') { zeroes++; self->head++; continue; } if (i >= MAX_ENTITY_SIZE) FAIL_ROUTE_AND_EXIT() - for (j = 0; j < NUM_MARKERS; j++) { - if (this == *MARKERS[j]) - FAIL_ROUTE_AND_EXIT() - } + if (is_marker(this)) + FAIL_ROUTE_AND_EXIT() j = 0; while (1) { if (!valid[j]) @@ -1508,7 +1506,7 @@ static int Tokenizer_parse_entity(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; - if (Tokenizer_emit_char(self, *"&")) + if (Tokenizer_emit_char(self, '&')) return -1; return 0; } @@ -1537,14 +1535,14 @@ static int Tokenizer_parse_comment(Tokenizer* self) return -1; while (1) { this = Tokenizer_READ(self, 0); - if (this == *"") { + if (!this) { comment = Tokenizer_pop(self); Py_XDECREF(comment); self->head = reset; return Tokenizer_emit_text(self, "")) self.assertTrue(code3.matches(("a", "b", ""))) From 066049b46a276e70662b6f4576ffd23df0b1ca4a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 21 Oct 2013 21:03:55 -0400 Subject: [PATCH 06/39] Update email address. --- LICENSE | 2 +- mwparserfromhell/__init__.py | 4 ++-- mwparserfromhell/definitions.py | 2 +- mwparserfromhell/nodes/__init__.py | 2 +- mwparserfromhell/nodes/argument.py | 2 +- mwparserfromhell/nodes/comment.py | 2 +- mwparserfromhell/nodes/external_link.py | 2 +- mwparserfromhell/nodes/extras/__init__.py | 2 +- mwparserfromhell/nodes/extras/attribute.py | 2 +- mwparserfromhell/nodes/extras/parameter.py | 2 +- mwparserfromhell/nodes/heading.py | 2 +- mwparserfromhell/nodes/html_entity.py | 2 +- mwparserfromhell/nodes/tag.py | 2 +- mwparserfromhell/nodes/template.py | 2 +- mwparserfromhell/nodes/text.py | 2 +- mwparserfromhell/nodes/wikilink.py | 2 +- mwparserfromhell/parser/__init__.py | 2 +- mwparserfromhell/parser/builder.py | 2 +- mwparserfromhell/parser/contexts.py | 2 +- mwparserfromhell/parser/tokenizer.c | 2 +- mwparserfromhell/parser/tokenizer.h | 2 +- mwparserfromhell/parser/tokenizer.py | 2 +- mwparserfromhell/parser/tokens.py | 2 +- mwparserfromhell/smart_list.py | 2 +- mwparserfromhell/string_mixin.py | 2 +- mwparserfromhell/utils.py | 2 +- mwparserfromhell/wikicode.py | 2 +- setup.py | 4 ++-- tests/_test_tokenizer.py | 2 +- tests/_test_tree_equality.py | 2 +- tests/test_argument.py | 2 +- tests/test_attribute.py | 2 +- tests/test_builder.py | 2 +- tests/test_comment.py | 2 +- tests/test_ctokenizer.py | 2 +- tests/test_docs.py | 2 +- tests/test_external_link.py | 2 +- tests/test_heading.py | 2 +- tests/test_html_entity.py | 2 +- tests/test_parameter.py | 2 +- tests/test_parser.py | 2 +- tests/test_pytokenizer.py | 2 +- tests/test_smart_list.py | 2 +- tests/test_string_mixin.py | 2 +- tests/test_tag.py | 2 +- tests/test_template.py | 2 +- tests/test_text.py | 2 +- tests/test_tokens.py | 2 +- tests/test_utils.py | 2 +- tests/test_wikicode.py | 2 +- tests/test_wikilink.py | 2 +- 51 files changed, 53 insertions(+), 53 deletions(-) diff --git a/LICENSE b/LICENSE index 413f1c4..71b7129 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2012-2013 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 3c011d0..a9dc2ff 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -32,7 +32,7 @@ __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" __license__ = "MIT License" __version__ = "0.4.dev" -__email__ = "ben.kurtovic@verizon.net" +__email__ = "ben.kurtovic@gmail.com" from . import (compat, definitions, nodes, parser, smart_list, string_mixin, utils, wikicode) diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py index 9449bcb..d0b7759 100644 --- a/mwparserfromhell/definitions.py +++ b/mwparserfromhell/definitions.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index ba97b3f..0b3b326 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index d7db92a..f30cddb 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index e96ce38..4ecc173 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index d74f6b3..1678eb1 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py index e860f01..1895f0d 100644 --- a/mwparserfromhell/nodes/extras/__init__.py +++ b/mwparserfromhell/nodes/extras/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 8f7f453..6266a7a 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index c1c10a0..dfba277 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index f001234..eecc65d 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index b51bd92..781bcfe 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 06f43d0..775abbd 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 7a9779b..e7b4cd3 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 6fda3da..c87594a 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 527e9bb..6ac092c 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 22c3dc2..81dea9b 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index d31f450..132f5d4 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index 33da8f7..dcf276c 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 1823006..ac0d863 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1,6 +1,6 @@ /* Tokenizer for MWParserFromHell -Copyright (C) 2012-2013 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index e2ac281..4b28e02 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -1,6 +1,6 @@ /* Tokenizer Header File for MWParserFromHell -Copyright (C) 2012-2013 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index eb4c571..35a2b09 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index 57308ea..383ddbe 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 09b7bbb..416c547 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index c52d4ca..bf68c18 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 758e751..f07101b 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 4a0763d..3b94394 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/setup.py b/setup.py index d6e77a1..d545fe4 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -39,7 +39,7 @@ setup( test_suite = "tests", version = __version__, author = "Ben Kurtovic", - author_email = "ben.kurtovic@verizon.net", + author_email = "ben.kurtovic@gmail.com", url = "https://github.com/earwig/mwparserfromhell", description = "MWParserFromHell is a parser for MediaWiki wikicode.", long_description = long_docs, diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index c1d49cb..d3ba2f6 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 3267b45..d6d92f1 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_argument.py b/tests/test_argument.py index 8191804..f09782f 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_attribute.py b/tests/test_attribute.py index f34c670..83f9a1e 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_builder.py b/tests/test_builder.py index 152ab53..41eca4b 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_comment.py b/tests/test_comment.py index 44225a2..0abb316 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 2374516..21b31f4 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_docs.py b/tests/test_docs.py index 6d066bd..4047c82 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 13a82bf..d91ffc7 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_heading.py b/tests/test_heading.py index 7a65872..5b56365 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index d38e5ec..bcc9fb5 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 4786e12..a43ffe6 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_parser.py b/tests/test_parser.py index 8760c0e..5c50b01 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 0211e7f..b769c97 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 25df555..16cbac0 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 5ee857c..ec0c97c 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_tag.py b/tests/test_tag.py index 5ef92a5..6f021ba 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_template.py b/tests/test_template.py index 2294baf..d421ed7 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_text.py b/tests/test_text.py index 35ac340..ad174f6 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 2048bb9..677c973 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_utils.py b/tests/test_utils.py index 80a0e5e..7d90813 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 26168a0..5824c15 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 7851032..1f5124f 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal From 79bf42df1c78382e38f0d7181fe55498786f9cfa Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 27 Oct 2013 17:20:26 -0400 Subject: [PATCH 07/39] Wikicode.get_sections() now returns sections in the correct order. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by Σ. --- CHANGELOG | 1 + docs/changelog.rst | 1 + mwparserfromhell/wikicode.py | 47 +++++++++++++++++++++----------------------- tests/test_wikicode.py | 17 ++++++++++------ 4 files changed, 35 insertions(+), 31 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index fc6cd3c..5e19dc5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,7 @@ v0.4 (unreleased): is a breaking change if you rely on the default behavior. - The 'matches' argument of Wikicode's filter methods now accepts a function (taking one argument, a Node, and returning a bool) in addition to a regex. +- Wikicode.get_sections() now returns sections in the correct order. - Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects instead of just a single string or Wikicode. - C code cleanup and speed improvements. diff --git a/docs/changelog.rst b/docs/changelog.rst index c7c5c56..e44d17b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -14,6 +14,7 @@ Unreleased - The *matches* argument of :py:class:`Wikicode's <.Wikicode>` :py:meth:`.filter` methods now accepts a function (taking one argument, a :py:class:`.Node`, and returning a bool) in addition to a regex. +- :py:meth:`.Wikicode.get_sections` now returns sections in the correct order. - :py:meth:`.Wikicode.matches` now accepts a tuple or list of strings/:py:class:`.Wikicode` objects instead of just a single string or :py:class:`.Wikicode`. diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 3b94394..19ad4f4 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -434,34 +434,31 @@ class Wikicode(StringMixIn): """ if matches: matches = r"^(=+?)\s*" + matches + r"\s*\1$" - headings = self.filter_headings() - filtered = self.filter_headings(matches=matches, flags=flags) + headings = self.filter_headings(recursive=False, matches=matches, + flags=flags) if levels: - filtered = [head for head in filtered if head.level in levels] + headings = [head for head in headings if head.level in levels] - if matches or include_lead is False or (not include_lead and levels): - buffers = [] - else: - buffers = [(maxsize, 0)] sections = [] - i = 0 - while i < len(self.nodes): - if self.nodes[i] in headings: - this = self.nodes[i].level - for (level, start) in buffers: - if this <= level: - sections.append(Wikicode(self.nodes[start:i])) - buffers = [buf for buf in buffers if buf[0] < this] - if self.nodes[i] in filtered: - if not include_headings: - i += 1 - if i >= len(self.nodes): - break - buffers.append((this, i)) - i += 1 - for (level, start) in buffers: - if start != i: - sections.append(Wikicode(self.nodes[start:i])) + if include_lead or not (include_lead is not None or matches or levels): + iterator = self.ifilter_headings(recursive=False) + try: + first = self.index(next(iterator)) + sections.append(Wikicode(self.nodes[:first])) + except StopIteration: # No headings in page + sections.append(Wikicode(self.nodes[:])) + + for heading in headings: + start = self.index(heading) + i = start + 1 + if not include_headings: + start += 1 + while i < len(self.nodes): + node = self.nodes[i] + if isinstance(node, Heading) and node.level <= heading.level: + break + i += 1 + sections.append(Wikicode(self.nodes[start:i])) return sections def strip_code(self, normalize=True, collapse=True): diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 5824c15..3e12cac 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -351,12 +351,12 @@ class TestWikicode(TreeEqualityTestCase): p4_III = "== Section III ==\n" + p4_IIIA page4 = parse(p4_lead + p4_I + p4_II + p4_III) - self.assertEqual([], page1.get_sections()) + self.assertEqual([""], page1.get_sections()) self.assertEqual(["", "==Heading=="], page2.get_sections()) self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], page3.get_sections()) - self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II, - p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], + self.assertEqual([p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II, + p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1], page4.get_sections()) self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) @@ -370,16 +370,16 @@ class TestWikicode(TreeEqualityTestCase): page3.get_sections(include_lead=False)) self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) - self.assertEqual([""], page2.get_sections(include_headings=False)) + self.assertEqual(["", ""], page2.get_sections(include_headings=False)) self.assertEqual(["\nSection I.B.1 body.\n\n•Some content.\n\n", "\nEven more text.\n" + p4_IIIA2ai1], page4.get_sections(levels=[4], include_headings=False)) self.assertEqual([], page4.get_sections(matches=r"body")) - self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1], + self.assertEqual([p4_I, p4_IA, p4_IB, p4_IB1], page4.get_sections(matches=r"Section\sI[.\s].*?")) - self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], + self.assertEqual([p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1], page4.get_sections(matches=r".*?a.*?")) self.assertEqual([p4_IIIA1a, p4_IIIA2ai1], page4.get_sections(matches=r".*?a.*?", flags=re.U)) @@ -387,6 +387,11 @@ class TestWikicode(TreeEqualityTestCase): page4.get_sections(matches=r".*?a.*?", flags=re.U, include_headings=False)) + sections = page2.get_sections(include_headings=False) + sections[0].append("Lead!\n") + sections[1].append("\nFirst section!") + self.assertEqual("Lead!\n==Heading==\nFirst section!", page2) + page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") section = page5.get_sections(matches="Foo")[0] section.replace("\nBar\n", "\nBarf ") From 8df596ba088d95b20b08d95e7c05ea0cc099d99c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 27 Oct 2013 20:22:17 -0400 Subject: [PATCH 08/39] Re-added 'flat' argument to Wikicode.get_sections(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Requested by Σ. --- CHANGELOG | 1 + docs/changelog.rst | 1 + mwparserfromhell/wikicode.py | 21 +++++++++++---------- tests/test_wikicode.py | 8 ++++++++ 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 5e19dc5..30ddb9e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,7 @@ v0.4 (unreleased): is a breaking change if you rely on the default behavior. - The 'matches' argument of Wikicode's filter methods now accepts a function (taking one argument, a Node, and returning a bool) in addition to a regex. +- Re-added 'flat' argument to Wikicode.get_sections(). - Wikicode.get_sections() now returns sections in the correct order. - Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects instead of just a single string or Wikicode. diff --git a/docs/changelog.rst b/docs/changelog.rst index e44d17b..83f4b88 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -14,6 +14,7 @@ Unreleased - The *matches* argument of :py:class:`Wikicode's <.Wikicode>` :py:meth:`.filter` methods now accepts a function (taking one argument, a :py:class:`.Node`, and returning a bool) in addition to a regex. +- Re-added *flat* argument to :py:meth:`.Wikicode.get_sections`. - :py:meth:`.Wikicode.get_sections` now returns sections in the correct order. - :py:meth:`.Wikicode.matches` now accepts a tuple or list of strings/:py:class:`.Wikicode` objects instead of just a single string or diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 19ad4f4..3bf458e 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -409,7 +409,7 @@ class Wikicode(StringMixIn): """ return list(self.ifilter(recursive, matches, flags, forcetype)) - def get_sections(self, levels=None, matches=None, flags=FLAGS, + def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False, include_lead=None, include_headings=True): """Return a list of sections within the page. @@ -417,13 +417,13 @@ class Wikicode(StringMixIn): node list (implemented using :py:class:`~.SmartList`) so that changes to sections are reflected in the parent Wikicode object. - Each section contains all of its subsections. If *levels* is given, it - should be a iterable of integers; only sections whose heading levels - are within it will be returned. If *matches* is given, it should be a - regex to be matched against the titles of section headings; only - sections whose headings match the regex will be included. *flags* can - be used to override the default regex flags (see :py:meth:`ifilter`) if - *matches* is used. + Each section contains all of its subsections, unless *flat* is + ``True``. If *levels* is given, it should be a iterable of integers; + only sections whose heading levels are within it will be returned. If + *matches* is given, it should be a regex to be matched against the + titles of section headings; only sections whose headings match the + regex will be included. *flags* can be used to override the default + regex flags (see :py:meth:`ifilter`) if *matches* is used. If *include_lead* is ``True``, the first, lead section (without a heading) will be included in the list; ``False`` will not include it; @@ -455,8 +455,9 @@ class Wikicode(StringMixIn): start += 1 while i < len(self.nodes): node = self.nodes[i] - if isinstance(node, Heading) and node.level <= heading.level: - break + if isinstance(node, Heading): + if flat or node.level <= heading.level: + break i += 1 sections.append(Wikicode(self.nodes[start:i])) return sections diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 3e12cac..c974be4 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -362,14 +362,22 @@ class TestWikicode(TreeEqualityTestCase): self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"], page3.get_sections(levels=(2, 3))) + self.assertEqual(["===Heading===\nFoo bar baz\n"], + page3.get_sections(levels=(2, 3), flat=True)) self.assertEqual([], page3.get_sections(levels=[0])) self.assertEqual(["", "====Gnidaeh====\n"], page3.get_sections(levels=[4], include_lead=True)) self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], page3.get_sections(include_lead=False)) + self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], + page3.get_sections(flat=True, include_lead=False)) self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) + self.assertEqual([p4_IA, p4_IB, p4_IIIA], page4.get_sections(levels=[3])) + self.assertEqual([p4_IA, "=== Section I.B ===\n", + "=== Section III.A ===\nText.\n"], + page4.get_sections(levels=[3], flat=True)) self.assertEqual(["", ""], page2.get_sections(include_headings=False)) self.assertEqual(["\nSection I.B.1 body.\n\n•Some content.\n\n", "\nEven more text.\n" + p4_IIIA2ai1], From 1946cf621dbc6d41ac280d18daf04979e567a698 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 27 Oct 2013 23:03:23 -0400 Subject: [PATCH 09/39] Add a temporary skip_style_tags until we resolve some issues. --- CHANGELOG | 3 +++ docs/changelog.rst | 4 ++++ mwparserfromhell/parser/__init__.py | 10 +++++++--- mwparserfromhell/parser/tokenizer.c | 10 ++++++---- mwparserfromhell/parser/tokenizer.h | 1 + mwparserfromhell/parser/tokenizer.py | 5 +++-- tests/_test_tree_equality.py | 2 +- tests/test_parser.py | 25 ++++++++++++++++++++++++- 8 files changed, 49 insertions(+), 11 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 30ddb9e..558e5cb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,6 +9,9 @@ v0.4 (unreleased): - Wikicode.get_sections() now returns sections in the correct order. - Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects instead of just a single string or Wikicode. +- Given the frequency of issues with the (admittedly insufficient) tag parser, + there's a temporary skip_style_tags argument to parse() that ignores '' and + ''' until these issues are corrected. - C code cleanup and speed improvements. v0.3.2 (released September 1, 2013): diff --git a/docs/changelog.rst b/docs/changelog.rst index 83f4b88..07b02da 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -19,6 +19,10 @@ Unreleased - :py:meth:`.Wikicode.matches` now accepts a tuple or list of strings/:py:class:`.Wikicode` objects instead of just a single string or :py:class:`.Wikicode`. +- Given the frequency of issues with the (admittedly insufficient) tag parser, + there's a temporary *skip_style_tags* argument to + :py:meth:`~mwparserfromhell.parse` that ignores ``''`` and ``'''`` until + these issues are corrected. - C code cleanup and speed improvements. v0.3.2 diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 81dea9b..6cbfa3a 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -53,8 +53,12 @@ class Parser(object): self._tokenizer = Tokenizer() self._builder = Builder() - def parse(self, text, context=0): - """Parse *text*, returning a :py:class:`~.Wikicode` object tree.""" - tokens = self._tokenizer.tokenize(text, context) + def parse(self, text, context=0, skip_style_tags=False): + """Parse *text*, returning a :py:class:`~.Wikicode` object tree. + + If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be + parsed, but instead be treated as plain text. + """ + tokens = self._tokenizer.tokenize(text, context, skip_style_tags) code = self._builder.build(tokens) return code diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index ac0d863..c37d8dc 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -2640,7 +2640,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) } else if (this == '>' && this_context & LC_TAG_CLOSE) return Tokenizer_handle_tag_close_close(self); - else if (this == next && next == '\'') { + else if (this == next && next == '\'' && !self->skip_style_tags) { temp = Tokenizer_parse_style(self); if (temp != Py_None) return temp; @@ -2675,9 +2675,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) { PyObject *text, *temp; - int context = 0; + int context = 0, skip_style_tags = 0; - if (PyArg_ParseTuple(args, "U|i", &text, &context)) { + if (PyArg_ParseTuple(args, "U|ii", &text, &context, &skip_style_tags)) { Py_XDECREF(self->text); self->text = PySequence_Fast(text, "expected a sequence"); } @@ -2686,7 +2686,8 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) Py_ssize_t size; /* Failed to parse a Unicode object; try a string instead. */ PyErr_Clear(); - if (!PyArg_ParseTuple(args, "s#|i", &encoded, &size, &context)) + if (!PyArg_ParseTuple(args, "s#|ii", &encoded, &size, &context, + &skip_style_tags)) return NULL; temp = PyUnicode_FromStringAndSize(encoded, size); if (!text) @@ -2698,6 +2699,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) } self->head = self->global = self->depth = self->cycles = 0; self->length = PyList_GET_SIZE(self->text); + self->skip_style_tags = skip_style_tags; return Tokenizer_parse(self, context, 1); } diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 4b28e02..ef5acd6 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -223,6 +223,7 @@ typedef struct { int global; /* global context */ int depth; /* stack recursion depth */ int cycles; /* total number of stack recursions */ + int skip_style_tags; /* temporary fix for the sometimes broken tag parser */ } Tokenizer; diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 35a2b09..8d12b62 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1124,7 +1124,7 @@ class Tokenizer(object): self._emit_text("<") elif this == ">" and self._context & contexts.TAG_CLOSE: return self._handle_tag_close_close() - elif this == next == "'": + elif this == next == "'" and not self._skip_style_tags: result = self._parse_style() if result is not None: return result @@ -1141,8 +1141,9 @@ class Tokenizer(object): self._emit_text(this) self._head += 1 - def tokenize(self, text, context=0): + def tokenize(self, text, context=0, skip_style_tags=False): """Build a list of tokens from a string of wikicode and return it.""" + self._skip_style_tags = skip_style_tags split = self.regex.split(text) self._text = [segment for segment in split if segment] self._head = self._global = self._depth = self._cycles = 0 diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index d6d92f1..38350d8 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -106,7 +106,7 @@ class TreeEqualityTestCase(TestCase): self.assertEqual(exp_attr.pad_first, act_attr.pad_first) self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq) self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq) - self.assertIs(expected.wiki_markup, actual.wiki_markup) + self.assertEqual(expected.wiki_markup, actual.wiki_markup) self.assertIs(expected.self_closing, actual.self_closing) self.assertIs(expected.invalid, actual.invalid) self.assertIs(expected.implicit, actual.implicit) diff --git a/tests/test_parser.py b/tests/test_parser.py index 5c50b01..672cbff 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -24,7 +24,7 @@ from __future__ import unicode_literals import unittest from mwparserfromhell import parser -from mwparserfromhell.nodes import Template, Text, Wikilink +from mwparserfromhell.nodes import Tag, Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext @@ -35,10 +35,12 @@ class TestParser(TreeEqualityTestCase): def test_use_c(self): """make sure the correct tokenizer is used""" + restore = parser.use_c if parser.use_c: self.assertTrue(parser.Parser()._tokenizer.USES_C) parser.use_c = False self.assertFalse(parser.Parser()._tokenizer.USES_C) + parser.use_c = restore def test_parsing(self): """integration test for parsing overall""" @@ -62,5 +64,26 @@ class TestParser(TreeEqualityTestCase): actual = parser.Parser().parse(text) self.assertWikicodeEqual(expected, actual) + def test_skip_style_tags(self): + """test Parser.parse(skip_style_tags=True)""" + def test(): + with_style = parser.Parser().parse(text, skip_style_tags=False) + without_style = parser.Parser().parse(text, skip_style_tags=True) + self.assertWikicodeEqual(a, with_style) + self.assertWikicodeEqual(b, without_style) + + text = "This is an example with ''italics''!" + a = wrap([Text("This is an example with "), + Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), + Text("!")]) + b = wraptext("This is an example with ''italics''!") + + restore = parser.use_c + if parser.use_c: + test() + parser.use_c = False + test() + parser.use_c = restore + if __name__ == "__main__": unittest.main(verbosity=2) From 39c07561305ec24bd5645efde928249067ef551d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 10 Nov 2013 03:35:50 -0500 Subject: [PATCH 10/39] Make StringMixIn a lot simpler thanks to __getattr__. --- mwparserfromhell/string_mixin.py | 276 ++------------------------------------- mwparserfromhell/wikicode.py | 2 +- tests/test_string_mixin.py | 16 +-- 3 files changed, 17 insertions(+), 277 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index bf68c18..856035b 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -26,20 +26,12 @@ interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. """ from __future__ import unicode_literals +from sys import getdefaultencoding -from .compat import py3k, py32, str +from .compat import bytes, py3k, str __all__ = ["StringMixIn"] -def inheritdoc(method): - """Set __doc__ of *method* to __doc__ of *method* in its parent class. - - Since this is used on :py:class:`~.StringMixIn`, the "parent class" used is - ``str``. This function can be used as a decorator. - """ - method.__doc__ = getattr(str, method.__name__).__doc__ - return method - class StringMixIn(object): """Implement the interface for ``unicode``/``str`` in a dynamic manner. @@ -55,10 +47,10 @@ class StringMixIn(object): return self.__unicode__() def __bytes__(self): - return self.__unicode__().encode("utf8") + return bytes(self.__unicode__(), getdefaultencoding()) else: def __str__(self): - return self.__unicode__().encode("utf8") + return bytes(self.__unicode__()) def __unicode__(self): raise NotImplementedError() @@ -67,33 +59,21 @@ class StringMixIn(object): return repr(self.__unicode__()) def __lt__(self, other): - if isinstance(other, StringMixIn): - return self.__unicode__() < other.__unicode__() return self.__unicode__() < other def __le__(self, other): - if isinstance(other, StringMixIn): - return self.__unicode__() <= other.__unicode__() return self.__unicode__() <= other def __eq__(self, other): - if isinstance(other, StringMixIn): - return self.__unicode__() == other.__unicode__() return self.__unicode__() == other def __ne__(self, other): - if isinstance(other, StringMixIn): - return self.__unicode__() != other.__unicode__() return self.__unicode__() != other def __gt__(self, other): - if isinstance(other, StringMixIn): - return self.__unicode__() > other.__unicode__() return self.__unicode__() > other def __ge__(self, other): - if isinstance(other, StringMixIn): - return self.__unicode__() >= other.__unicode__() return self.__unicode__() >= other if py3k: @@ -117,250 +97,10 @@ class StringMixIn(object): return reversed(self.__unicode__()) def __contains__(self, item): - if isinstance(item, StringMixIn): - return str(item) in self.__unicode__() - return item in self.__unicode__() - - @inheritdoc - def capitalize(self): - return self.__unicode__().capitalize() - - if py3k and not py32: - @inheritdoc - def casefold(self): - return self.__unicode__().casefold() - - @inheritdoc - def center(self, width, fillchar=None): - if fillchar is None: - return self.__unicode__().center(width) - return self.__unicode__().center(width, fillchar) - - @inheritdoc - def count(self, sub, start=None, end=None): - return self.__unicode__().count(sub, start, end) + return str(item) in self.__unicode__() - if not py3k: - @inheritdoc - def decode(self, encoding=None, errors=None): - kwargs = {} - if encoding is not None: - kwargs["encoding"] = encoding - if errors is not None: - kwargs["errors"] = errors - return self.__unicode__().decode(**kwargs) - - @inheritdoc - def encode(self, encoding=None, errors=None): - kwargs = {} - if encoding is not None: - kwargs["encoding"] = encoding - if errors is not None: - kwargs["errors"] = errors - return self.__unicode__().encode(**kwargs) - - @inheritdoc - def endswith(self, prefix, start=None, end=None): - return self.__unicode__().endswith(prefix, start, end) - - @inheritdoc - def expandtabs(self, tabsize=None): - if tabsize is None: - return self.__unicode__().expandtabs() - return self.__unicode__().expandtabs(tabsize) - - @inheritdoc - def find(self, sub, start=None, end=None): - return self.__unicode__().find(sub, start, end) - - @inheritdoc - def format(self, *args, **kwargs): - return self.__unicode__().format(*args, **kwargs) + def __getattr__(self, attr): + return getattr(self.__unicode__(), attr) if py3k: - @inheritdoc - def format_map(self, mapping): - return self.__unicode__().format_map(mapping) - - @inheritdoc - def index(self, sub, start=None, end=None): - return self.__unicode__().index(sub, start, end) - - @inheritdoc - def isalnum(self): - return self.__unicode__().isalnum() - - @inheritdoc - def isalpha(self): - return self.__unicode__().isalpha() - - @inheritdoc - def isdecimal(self): - return self.__unicode__().isdecimal() - - @inheritdoc - def isdigit(self): - return self.__unicode__().isdigit() - - if py3k: - @inheritdoc - def isidentifier(self): - return self.__unicode__().isidentifier() - - @inheritdoc - def islower(self): - return self.__unicode__().islower() - - @inheritdoc - def isnumeric(self): - return self.__unicode__().isnumeric() - - if py3k: - @inheritdoc - def isprintable(self): - return self.__unicode__().isprintable() - - @inheritdoc - def isspace(self): - return self.__unicode__().isspace() - - @inheritdoc - def istitle(self): - return self.__unicode__().istitle() - - @inheritdoc - def isupper(self): - return self.__unicode__().isupper() - - @inheritdoc - def join(self, iterable): - return self.__unicode__().join(iterable) - - @inheritdoc - def ljust(self, width, fillchar=None): - if fillchar is None: - return self.__unicode__().ljust(width) - return self.__unicode__().ljust(width, fillchar) - - @inheritdoc - def lower(self): - return self.__unicode__().lower() - - @inheritdoc - def lstrip(self, chars=None): - return self.__unicode__().lstrip(chars) - - if py3k: - @staticmethod - @inheritdoc - def maketrans(x, y=None, z=None): - if z is None: - if y is None: - return str.maketrans(x) - return str.maketrans(x, y) - return str.maketrans(x, y, z) - - @inheritdoc - def partition(self, sep): - return self.__unicode__().partition(sep) - - @inheritdoc - def replace(self, old, new, count=None): - if count is None: - return self.__unicode__().replace(old, new) - return self.__unicode__().replace(old, new, count) - - @inheritdoc - def rfind(self, sub, start=None, end=None): - return self.__unicode__().rfind(sub, start, end) - - @inheritdoc - def rindex(self, sub, start=None, end=None): - return self.__unicode__().rindex(sub, start, end) - - @inheritdoc - def rjust(self, width, fillchar=None): - if fillchar is None: - return self.__unicode__().rjust(width) - return self.__unicode__().rjust(width, fillchar) - - @inheritdoc - def rpartition(self, sep): - return self.__unicode__().rpartition(sep) - - if py3k and not py32: - @inheritdoc - def rsplit(self, sep=None, maxsplit=None): - kwargs = {} - if sep is not None: - kwargs["sep"] = sep - if maxsplit is not None: - kwargs["maxsplit"] = maxsplit - return self.__unicode__().rsplit(**kwargs) - else: - @inheritdoc - def rsplit(self, sep=None, maxsplit=None): - if maxsplit is None: - if sep is None: - return self.__unicode__().rsplit() - return self.__unicode__().rsplit(sep) - return self.__unicode__().rsplit(sep, maxsplit) - - @inheritdoc - def rstrip(self, chars=None): - return self.__unicode__().rstrip(chars) - - if py3k and not py32: - @inheritdoc - def split(self, sep=None, maxsplit=None): - kwargs = {} - if sep is not None: - kwargs["sep"] = sep - if maxsplit is not None: - kwargs["maxsplit"] = maxsplit - return self.__unicode__().split(**kwargs) - else: - @inheritdoc - def split(self, sep=None, maxsplit=None): - if maxsplit is None: - if sep is None: - return self.__unicode__().split() - return self.__unicode__().split(sep) - return self.__unicode__().split(sep, maxsplit) - - @inheritdoc - def splitlines(self, keepends=None): - if keepends is None: - return self.__unicode__().splitlines() - return self.__unicode__().splitlines(keepends) - - @inheritdoc - def startswith(self, prefix, start=None, end=None): - return self.__unicode__().startswith(prefix, start, end) - - @inheritdoc - def strip(self, chars=None): - return self.__unicode__().strip(chars) - - @inheritdoc - def swapcase(self): - return self.__unicode__().swapcase() - - @inheritdoc - def title(self): - return self.__unicode__().title() - - @inheritdoc - def translate(self, table): - return self.__unicode__().translate(table) - - @inheritdoc - def upper(self): - return self.__unicode__().upper() - - @inheritdoc - def zfill(self, width): - return self.__unicode__().zfill(width) - - -del inheritdoc + maketrans = str.maketrans # Static method can't rely on __getattr__ diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 3bf458e..148726c 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -23,7 +23,7 @@ from __future__ import unicode_literals import re -from .compat import maxsize, py3k, str +from .compat import py3k, str from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) from .string_mixin import StringMixIn diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index ec0c97c..ca627ea 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -59,8 +59,8 @@ class TestStringMixIn(unittest.TestCase): else: methods.append("decode") for meth in methods: - expected = getattr(str, meth).__doc__ - actual = getattr(StringMixIn, meth).__doc__ + expected = getattr("foo", meth).__doc__ + actual = getattr(_FakeString("foo"), meth).__doc__ self.assertEqual(expected, actual) def test_types(self): @@ -109,12 +109,12 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str1 < str4) self.assertTrue(str1 <= str4) - self.assertTrue(str1 > str5) - self.assertTrue(str1 >= str5) - self.assertFalse(str1 == str5) - self.assertTrue(str1 != str5) - self.assertFalse(str1 < str5) - self.assertFalse(str1 <= str5) + self.assertFalse(str5 > str1) + self.assertFalse(str5 >= str1) + self.assertFalse(str5 == str1) + self.assertTrue(str5 != str1) + self.assertTrue(str5 < str1) + self.assertTrue(str5 <= str1) def test_other_magics(self): """test other magically implemented features, like len() and iter()""" From c8485bf56b4cdc1390c9884cd9ea21901c347b2a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 10 Nov 2013 03:39:40 -0500 Subject: [PATCH 11/39] Python 3.2 is weird. --- tests/test_string_mixin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index ca627ea..ad12df7 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -376,7 +376,7 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] self.assertEqual(actual, str25.rsplit(" ", 3)) - if py3k: + if py3k and not py32: actual = [" this is a", "sentence", "with", "whitespace"] self.assertEqual(actual, str25.rsplit(maxsplit=3)) @@ -394,7 +394,7 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] self.assertEqual(actual, str25.split(" ", 3)) - if py3k: + if py3k and not py32: actual = ["this", "is", "a", "sentence with whitespace "] self.assertEqual(actual, str25.split(maxsplit=3)) From 572d7c301416b97fb8993cf76d60eeb1f838c6b2 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 26 Nov 2013 00:44:19 -0500 Subject: [PATCH 12/39] Refactor out part of _do_search; some efficiency. --- mwparserfromhell/wikicode.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 148726c..2444cfa 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -92,6 +92,23 @@ class Wikicode(StringMixIn): return False return obj in nodes + def _prepare_search(self, obj): + """Prepare a new search by calculating the exact parameters. + + *obj*, which may be anything passable to :py:func:`.parse_anything`, is + converted to either a single :py:class:`.Node` or a + :py:class:`.Wikicode` of multiple nodes. *literal* is a boolean; + ``True`` if we are searching for an exact match with ``is`` or + ``False`` if we are searching for equality with ``==``. + """ + literal = isinstance(obj, (Node, Wikicode)) + obj = parse_anything(obj) + if not obj or obj not in self: + raise ValueError(obj) + if len(obj.nodes) == 1: + obj = obj.get(0) + return obj, literal + def _do_search(self, obj, recursive, context=None, literal=None): """Return some info about the location of *obj* within *context*. @@ -105,14 +122,8 @@ class Wikicode(StringMixIn): """ if not context: context = self - literal = isinstance(obj, (Node, Wikicode)) - obj = parse_anything(obj) - if not obj or obj not in self: - raise ValueError(obj) - if len(obj.nodes) == 1: - obj = obj.get(0) - - compare = lambda a, b: (a is b) if literal else (a == b) + obj, literal = self._prepare_search(obj) + compare = (lambda a, b: a is b) if literal else (lambda a, b: a == b) results = [] i = 0 while i < len(context.nodes): @@ -127,7 +138,7 @@ class Wikicode(StringMixIn): nodes = list(context.nodes[i:i + len(obj.nodes)]) results.append((Wikicode, context, nodes)) i += len(obj.nodes) - 1 - elif recursive: + elif recursive and not isinstance(node, Text) and obj in node: contexts = node.__iternodes__(self._get_all_nodes) processed = [] for code in (ctx for ctx, child in contexts): From 99d433c2d55bbf5d12d9c76ae324b65de825d17e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Nov 2013 21:07:42 -0500 Subject: [PATCH 13/39] Change protocol for Node iteration; rename __iternodes__ to __children__. --- mwparserfromhell/nodes/__init__.py | 20 ++++++++++---------- mwparserfromhell/nodes/argument.py | 9 +++------ mwparserfromhell/nodes/external_link.py | 9 +++------ mwparserfromhell/nodes/heading.py | 6 ++---- mwparserfromhell/nodes/tag.py | 20 +++++++------------- mwparserfromhell/nodes/template.py | 12 ++++-------- mwparserfromhell/nodes/wikilink.py | 9 +++------ 7 files changed, 32 insertions(+), 53 deletions(-) diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 0b3b326..91afb23 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -42,21 +42,21 @@ class Node(StringMixIn): :py:meth:`__unicode__` must be overridden. It should return a ``unicode`` or (``str`` in py3k) representation of the node. If the node contains - :py:class:`~.Wikicode` objects inside of it, :py:meth:`__iternodes__` - should be overridden to yield tuples of (``wikicode``, - ``node_in_wikicode``) for each node in each wikicode, as well as the node - itself (``None``, ``self``). If the node is printable, :py:meth:`__strip__` - should be overridden to return the printable version of the node - it does - not have to be a string, but something that can be converted to a string - with ``str()``. Finally, :py:meth:`__showtree__` can be overridden to build - a nice tree representation of the node, if desired, for + :py:class:`~.Wikicode` objects inside of it, :py:meth:`__children__` + should be a generator that iterates over them. If the node is printable + (shown when the page is rendered), :py:meth:`__strip__` should return its + printable version, stripping out any formatting marks. It does not have to + return a string, but something that can be converted to a string with + ``str()``. Finally, :py:meth:`__showtree__` can be overridden to build a + nice tree representation of the node, if desired, for :py:meth:`~.Wikicode.get_tree`. """ def __unicode__(self): raise NotImplementedError() - def __iternodes__(self, getter): - yield None, self + def __children__(self): + return # Funny generator-that-yields-nothing syntax + yield def __strip__(self, normalize, collapse): return None diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index f30cddb..d28d979 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -42,13 +42,10 @@ class Argument(Node): return start + "|" + str(self.default) + "}}}" return start + "}}}" - def __iternodes__(self, getter): - yield None, self - for child in getter(self.name): - yield self.name, child + def __children__(self): + yield self.name if self.default is not None: - for child in getter(self.default): - yield self.default, child + yield self.default def __strip__(self, normalize, collapse): if self.default is not None: diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index 1678eb1..89eab1f 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -44,13 +44,10 @@ class ExternalLink(Node): return "[" + str(self.url) + "]" return str(self.url) - def __iternodes__(self, getter): - yield None, self - for child in getter(self.url): - yield self.url, child + def __children__(self): + yield self.url if self.title is not None: - for child in getter(self.title): - yield self.title, child + yield self.title def __strip__(self, normalize, collapse): if self.brackets: diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index eecc65d..e3a0ae5 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -39,10 +39,8 @@ class Heading(Node): def __unicode__(self): return ("=" * self.level) + str(self.title) + ("=" * self.level) - def __iternodes__(self, getter): - yield None, self - for child in getter(self.title): - yield self.title, child + def __children__(self): + yield self.title def __strip__(self, normalize, collapse): return self.title.strip_code(normalize, collapse) diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 775abbd..6869c72 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -70,23 +70,17 @@ class Tag(Node): result += "" return result - def __iternodes__(self, getter): - yield None, self + def __children__(self): if not self.wiki_markup: - for child in getter(self.tag): - yield self.tag, child + yield self.tag for attr in self.attributes: - for child in getter(attr.name): - yield attr.name, child - if attr.value: - for child in getter(attr.value): - yield attr.value, child + yield attr.name + if attr.value is not None: + yield attr.value if self.contents: - for child in getter(self.contents): - yield self.contents, child + yield self.contents if not self.self_closing and not self.wiki_markup and self.closing_tag: - for child in getter(self.closing_tag): - yield self.closing_tag, child + yield self.closing_tag def __strip__(self, normalize, collapse): if self.contents and is_visible(self.tag): diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index e7b4cd3..c9f99bf 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -51,16 +51,12 @@ class Template(Node): else: return "{{" + str(self.name) + "}}" - def __iternodes__(self, getter): - yield None, self - for child in getter(self.name): - yield self.name, child + def __children__(self): + yield self.name for param in self.params: if param.showkey: - for child in getter(param.name): - yield param.name, child - for child in getter(param.value): - yield param.value, child + yield param.name + yield param.value def __showtree__(self, write, get, mark): write("{{") diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 6ac092c..f730697 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -41,13 +41,10 @@ class Wikilink(Node): return "[[" + str(self.title) + "|" + str(self.text) + "]]" return "[[" + str(self.title) + "]]" - def __iternodes__(self, getter): - yield None, self - for child in getter(self.title): - yield self.title, child + def __children__(self): + yield self.title if self.text is not None: - for child in getter(self.text): - yield self.text, child + yield self.text def __strip__(self, normalize, collapse): if self.text is not None: From 44484daef3b158a43507fd526ec9179520039ff4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 29 Nov 2013 14:26:41 -0500 Subject: [PATCH 14/39] Updating tests for new __children__ protocol. --- tests/_test_tree_equality.py | 9 --------- tests/test_argument.py | 25 ++++++++++------------- tests/test_comment.py | 7 +++---- tests/test_external_link.py | 27 +++++++++++-------------- tests/test_heading.py | 15 ++++++-------- tests/test_html_entity.py | 7 +++---- tests/test_tag.py | 47 +++++++++++++++++++------------------------- tests/test_template.py | 34 +++++++++++++------------------- tests/test_text.py | 7 +++---- tests/test_wikilink.py | 25 ++++++++++------------- 10 files changed, 80 insertions(+), 123 deletions(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 38350d8..bd1f08f 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -32,15 +32,6 @@ from mwparserfromhell.wikicode import Wikicode wrap = lambda L: Wikicode(SmartList(L)) wraptext = lambda *args: wrap([Text(t) for t in args]) -def getnodes(code): - """Iterate over all child nodes of a given parent node. - - Imitates Wikicode._get_all_nodes(). - """ - for node in code.nodes: - for context, child in node.__iternodes__(getnodes): - yield child - class TreeEqualityTestCase(TestCase): """A base test case with support for comparing the equality of node trees. diff --git a/tests/test_argument.py b/tests/test_argument.py index f09782f..df6838d 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestArgument(TreeEqualityTestCase): """Test cases for the Argument node.""" @@ -38,20 +38,15 @@ class TestArgument(TreeEqualityTestCase): node2 = Argument(wraptext("foo"), wraptext("bar")) self.assertEqual("{{{foo|bar}}}", str(node2)) - def test_iternodes(self): - """test Argument.__iternodes__()""" - node1n1 = Text("foobar") - node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") - node1 = Argument(wrap([node1n1])) - node2 = Argument(wrap([node2n1]), wrap([node2n2, node2n3])) - gen1 = node1.__iternodes__(getnodes) - gen2 = node2.__iternodes__(getnodes) - self.assertEqual((None, node1), next(gen1)) - self.assertEqual((None, node2), next(gen2)) - self.assertEqual((node1.name, node1n1), next(gen1)) - self.assertEqual((node2.name, node2n1), next(gen2)) - self.assertEqual((node2.default, node2n2), next(gen2)) - self.assertEqual((node2.default, node2n3), next(gen2)) + def test_children(self): + """test Argument.__children__()""" + node1 = Argument(wraptext("foobar")) + node2 = Argument(wraptext("foo"), wrap([Text("bar"), Text("baz")])) + gen1 = node1.__children__() + gen2 = node2.__children__() + self.assertIs(node1.name, gen1) + self.assertIs(node2.name, gen2) + self.assertIs(node2.default, gen2) self.assertRaises(StopIteration, next, gen1) self.assertRaises(StopIteration, next, gen2) diff --git a/tests/test_comment.py b/tests/test_comment.py index 0abb316..bea39d8 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -36,11 +36,10 @@ class TestComment(TreeEqualityTestCase): node = Comment("foobar") self.assertEqual("", str(node)) - def test_iternodes(self): - """test Comment.__iternodes__()""" + def test_children(self): + """test Comment.__children__()""" node = Comment("foobar") - gen = node.__iternodes__(None) - self.assertEqual((None, node), next(gen)) + gen = node.__children__() self.assertRaises(StopIteration, next, gen) def test_strip(self): diff --git a/tests/test_external_link.py b/tests/test_external_link.py index d91ffc7..7f5f042 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import ExternalLink, Text -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestExternalLink(TreeEqualityTestCase): """Test cases for the ExternalLink node.""" @@ -43,21 +43,16 @@ class TestExternalLink(TreeEqualityTestCase): wraptext("Example Web Page")) self.assertEqual("[http://example.com/ Example Web Page]", str(node4)) - def test_iternodes(self): - """test ExternalLink.__iternodes__()""" - node1n1 = Text("http://example.com/") - node2n1 = Text("http://example.com/") - node2n2, node2n3 = Text("Example"), Text("Page") - node1 = ExternalLink(wrap([node1n1]), brackets=False) - node2 = ExternalLink(wrap([node2n1]), wrap([node2n2, node2n3])) - gen1 = node1.__iternodes__(getnodes) - gen2 = node2.__iternodes__(getnodes) - self.assertEqual((None, node1), next(gen1)) - self.assertEqual((None, node2), next(gen2)) - self.assertEqual((node1.url, node1n1), next(gen1)) - self.assertEqual((node2.url, node2n1), next(gen2)) - self.assertEqual((node2.title, node2n2), next(gen2)) - self.assertEqual((node2.title, node2n3), next(gen2)) + def test_children(self): + """test ExternalLink.__children__()""" + node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) + node2 = ExternalLink(wraptext("http://example.com/"), + wrap([Text("Example"), Text("Page")])) + gen1 = node1.__children__() + gen2 = node2.__children__() + self.assertEqual(node1.url, next(gen1)) + self.assertEqual(node2.url, next(gen2)) + self.assertEqual(node2.title, next(gen2)) self.assertRaises(StopIteration, next, gen1) self.assertRaises(StopIteration, next, gen2) diff --git a/tests/test_heading.py b/tests/test_heading.py index 5b56365..2fe9ffe 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestHeading(TreeEqualityTestCase): """Test cases for the Heading node.""" @@ -38,14 +38,11 @@ class TestHeading(TreeEqualityTestCase): node2 = Heading(wraptext(" zzz "), 5) self.assertEqual("===== zzz =====", str(node2)) - def test_iternodes(self): - """test Heading.__iternodes__()""" - text1, text2 = Text("foo"), Text("bar") - node = Heading(wrap([text1, text2]), 3) - gen = node.__iternodes__(getnodes) - self.assertEqual((None, node), next(gen)) - self.assertEqual((node.title, text1), next(gen)) - self.assertEqual((node.title, text2), next(gen)) + def test_children(self): + """test Heading.__children__()""" + node = Heading(wrap([Text("foo"), Text("bar")]), 3) + gen = node.__children__() + self.assertEqual(node.title, next(gen)) self.assertRaises(StopIteration, next, gen) def test_strip(self): diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index bcc9fb5..60f4c38 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -42,11 +42,10 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertEqual("k", str(node3)) self.assertEqual("l", str(node4)) - def test_iternodes(self): - """test HTMLEntity.__iternodes__()""" + def test_children(self): + """test HTMLEntity.__children__()""" node = HTMLEntity("nbsp", named=True, hexadecimal=False) - gen = node.__iternodes__(None) - self.assertEqual((None, node), next(gen)) + gen = node.__children__() self.assertRaises(StopIteration, next, gen) def test_strip(self): diff --git a/tests/test_tag.py b/tests/test_tag.py index 6f021ba..8ee0fc0 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Tag, Template, Text from mwparserfromhell.nodes.extras import Attribute -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False) @@ -64,37 +64,30 @@ class TestTag(TreeEqualityTestCase): self.assertEqual("----", str(node8)) self.assertEqual("''italics!''", str(node9)) - def test_iternodes(self): - """test Tag.__iternodes__()""" - node1n1, node1n2 = Text("ref"), Text("foobar") - node2n1, node3n1, node3n2 = Text("bold text"), Text("img"), Text("id") - node3n3, node3n4, node3n5 = Text("foo"), Text("class"), Text("bar") - + def test_children(self): + """test Tag.__children__()""" # foobar - node1 = Tag(wrap([node1n1]), wrap([node1n2])) + node1 = Tag(wraptext("ref"), wraptext("foobar")) # '''bold text''' - node2 = Tag(wraptext("b"), wrap([node2n1]), wiki_markup="'''") + node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") # - node3 = Tag(wrap([node3n1]), - attrs=[Attribute(wrap([node3n2]), wrap([node3n3])), - Attribute(wrap([node3n4]), wrap([node3n5]))], + node3 = Tag(wraptext("img"), + attrs=[Attribute(wraptext("id"), wraptext("foo")), + Attribute(wraptext("class"), wraptext("bar"))], self_closing=True, padding=" ") - gen1 = node1.__iternodes__(getnodes) - gen2 = node2.__iternodes__(getnodes) - gen3 = node3.__iternodes__(getnodes) - self.assertEqual((None, node1), next(gen1)) - self.assertEqual((None, node2), next(gen2)) - self.assertEqual((None, node3), next(gen3)) - self.assertEqual((node1.tag, node1n1), next(gen1)) - self.assertEqual((node3.tag, node3n1), next(gen3)) - self.assertEqual((node3.attributes[0].name, node3n2), next(gen3)) - self.assertEqual((node3.attributes[0].value, node3n3), next(gen3)) - self.assertEqual((node3.attributes[1].name, node3n4), next(gen3)) - self.assertEqual((node3.attributes[1].value, node3n5), next(gen3)) - self.assertEqual((node1.contents, node1n2), next(gen1)) - self.assertEqual((node2.contents, node2n1), next(gen2)) - self.assertEqual((node1.closing_tag, node1n1), next(gen1)) + gen1 = node1.__children__() + gen2 = node2.__children__() + gen3 = node3.__children__() + self.assertEqual(node1.tag, next(gen1)) + self.assertEqual(node3.tag, next(gen3)) + self.assertEqual(node3.attributes[0].name, next(gen3)) + self.assertEqual(node3.attributes[0].value, next(gen3)) + self.assertEqual(node3.attributes[1].name, next(gen3)) + self.assertEqual(node3.attributes[1].value, next(gen3)) + self.assertEqual(node1.contents, next(gen1)) + self.assertEqual(node2.contents, next(gen2)) + self.assertEqual(node1.closing_tag, next(gen1)) self.assertRaises(StopIteration, next, gen1) self.assertRaises(StopIteration, next, gen2) self.assertRaises(StopIteration, next, gen3) diff --git a/tests/test_template.py b/tests/test_template.py index d421ed7..7326433 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) @@ -42,27 +42,21 @@ class TestTemplate(TreeEqualityTestCase): [pgenh("1", "bar"), pgens("abc", "def")]) self.assertEqual("{{foo|bar|abc=def}}", str(node2)) - def test_iternodes(self): - """test Template.__iternodes__()""" - node1n1 = Text("foobar") - node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("abc") - node2n4, node2n5 = Text("def"), Text("ghi") - node2p1 = Parameter(wraptext("1"), wrap([node2n2]), showkey=False) - node2p2 = Parameter(wrap([node2n3]), wrap([node2n4, node2n5]), + def test_children(self): + """test Template.__children__()""" + node2p1 = Parameter(wraptext("1"), wraptext("bar"), showkey=False) + node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), showkey=True) - node1 = Template(wrap([node1n1])) - node2 = Template(wrap([node2n1]), [node2p1, node2p2]) + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [node2p1, node2p2]) - gen1 = node1.__iternodes__(getnodes) - gen2 = node2.__iternodes__(getnodes) - self.assertEqual((None, node1), next(gen1)) - self.assertEqual((None, node2), next(gen2)) - self.assertEqual((node1.name, node1n1), next(gen1)) - self.assertEqual((node2.name, node2n1), next(gen2)) - self.assertEqual((node2.params[0].value, node2n2), next(gen2)) - self.assertEqual((node2.params[1].name, node2n3), next(gen2)) - self.assertEqual((node2.params[1].value, node2n4), next(gen2)) - self.assertEqual((node2.params[1].value, node2n5), next(gen2)) + gen1 = node1.__children__() + gen2 = node2.__children__() + self.assertEqual(node1.name, next(gen1)) + self.assertEqual(node2.name, next(gen2)) + self.assertEqual(node2.params[0].value, next(gen2)) + self.assertEqual(node2.params[1].name, next(gen2)) + self.assertEqual(node2.params[1].value, next(gen2)) self.assertRaises(StopIteration, next, gen1) self.assertRaises(StopIteration, next, gen2) diff --git a/tests/test_text.py b/tests/test_text.py index ad174f6..ba45aa3 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -36,11 +36,10 @@ class TestText(unittest.TestCase): node2 = Text("fóóbar") self.assertEqual("fóóbar", str(node2)) - def test_iternodes(self): - """test Text.__iternodes__()""" + def test_children(self): + """test Text.__children__()""" node = Text("foobar") - gen = node.__iternodes__(None) - self.assertEqual((None, node), next(gen)) + gen = node.__children__() self.assertRaises(StopIteration, next, gen) def test_strip(self): diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 1f5124f..c19d5ca 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestWikilink(TreeEqualityTestCase): """Test cases for the Wikilink node.""" @@ -38,20 +38,15 @@ class TestWikilink(TreeEqualityTestCase): node2 = Wikilink(wraptext("foo"), wraptext("bar")) self.assertEqual("[[foo|bar]]", str(node2)) - def test_iternodes(self): - """test Wikilink.__iternodes__()""" - node1n1 = Text("foobar") - node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") - node1 = Wikilink(wrap([node1n1])) - node2 = Wikilink(wrap([node2n1]), wrap([node2n2, node2n3])) - gen1 = node1.__iternodes__(getnodes) - gen2 = node2.__iternodes__(getnodes) - self.assertEqual((None, node1), next(gen1)) - self.assertEqual((None, node2), next(gen2)) - self.assertEqual((node1.title, node1n1), next(gen1)) - self.assertEqual((node2.title, node2n1), next(gen2)) - self.assertEqual((node2.text, node2n2), next(gen2)) - self.assertEqual((node2.text, node2n3), next(gen2)) + def test_children(self): + """test Wikilink.__children__()""" + node1 = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foo"), wrap([Text("bar"), Text("baz")])) + gen1 = node1.__children__() + gen2 = node2.__children__() + self.assertEqual(node1.title, next(gen1)) + self.assertEqual(node2.title, next(gen2)) + self.assertEqual(node2.text, next(gen2)) self.assertRaises(StopIteration, next, gen1) self.assertRaises(StopIteration, next, gen2) From 45aec31629f2294f96e65ce770cc649d906134a4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 29 Nov 2013 20:56:18 -0500 Subject: [PATCH 15/39] Move range/xrange compatibility code to compat.py. --- mwparserfromhell/compat.py | 2 ++ mwparserfromhell/nodes/template.py | 2 +- mwparserfromhell/parser/tokenizer.py | 2 +- tests/_test_tree_equality.py | 1 + tests/compat.py | 2 -- tests/test_parser.py | 2 +- tests/test_smart_list.py | 4 +--- tests/test_string_mixin.py | 4 +--- tests/test_wikicode.py | 2 +- 9 files changed, 9 insertions(+), 12 deletions(-) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index a142128..9c8ce8c 100644 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -16,12 +16,14 @@ py32 = py3k and sys.version_info.minor == 2 if py3k: bytes = bytes str = str + range = xrange maxsize = sys.maxsize import html.entities as htmlentities else: bytes = str str = unicode + range = range maxsize = sys.maxint import htmlentitydefs as htmlentities diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index c9f99bf..bb0b912 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -26,7 +26,7 @@ import re from . import HTMLEntity, Node, Text from .extras import Parameter -from ..compat import str +from ..compat import range, str from ..utils import parse_anything __all__ = ["Template"] diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 8d12b62..bbc3b4c 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -25,7 +25,7 @@ from math import log import re from . import contexts, tokens -from ..compat import htmlentities +from ..compat import htmlentities, range from ..definitions import (get_html_tag, is_parsable, is_single, is_single_only, is_scheme) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index bd1f08f..3305bc1 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -23,6 +23,7 @@ from __future__ import unicode_literals from unittest import TestCase +from mwparserfromhell.compat import range from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from mwparserfromhell.nodes.extras import Attribute, Parameter diff --git a/tests/compat.py b/tests/compat.py index 8bed40e..d5b3fba 100644 --- a/tests/compat.py +++ b/tests/compat.py @@ -9,12 +9,10 @@ the main library. from mwparserfromhell.compat import py3k if py3k: - range = range from io import StringIO from urllib.parse import urlencode from urllib.request import urlopen else: - range = xrange from StringIO import StringIO from urllib import urlencode, urlopen diff --git a/tests/test_parser.py b/tests/test_parser.py index 672cbff..9975824 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -24,11 +24,11 @@ from __future__ import unicode_literals import unittest from mwparserfromhell import parser +from mwparserfromhell.compat import range from mwparserfromhell.nodes import Tag, Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext -from .compat import range class TestParser(TreeEqualityTestCase): """Tests for the Parser class itself, which tokenizes and builds nodes.""" diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 16cbac0..d566f96 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -23,11 +23,9 @@ from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import py3k +from mwparserfromhell.compat import py3k, range from mwparserfromhell.smart_list import SmartList, _ListProxy -from .compat import range - class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index ad12df7..43ae9f4 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -25,11 +25,9 @@ from sys import getdefaultencoding from types import GeneratorType import unittest -from mwparserfromhell.compat import bytes, py3k, py32, str +from mwparserfromhell.compat import bytes, py3k, py32, range, str from mwparserfromhell.string_mixin import StringMixIn -from .compat import range - class _FakeString(StringMixIn): def __init__(self, data): self._data = data diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index c974be4..c09f8a2 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -26,12 +26,12 @@ import re from types import GeneratorType import unittest +from mwparserfromhell.compat import py3k, str from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode from mwparserfromhell import parse -from mwparserfromhell.compat import py3k, str from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext From c0fb7c030a5aeb57cd6acc87dae19775cf4d0253 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Nov 2013 15:20:03 -0500 Subject: [PATCH 16/39] Implement new search protocol in Wikicode. --- mwparserfromhell/smart_list.py | 26 ++-- mwparserfromhell/utils.py | 15 +- mwparserfromhell/wikicode.py | 321 +++++++++++++++++++++-------------------- tests/test_argument.py | 6 +- 4 files changed, 193 insertions(+), 175 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 416c547..16d9b1a 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -79,6 +79,11 @@ class SmartList(_SliceNormalizerMixIn, list): [2, 3, 4] >>> parent [0, 1, 2, 3, 4] + + The parent needs to keep a list of its children in order to update them, + which prevents them from being garbage-collected. If you are keeping the + parent around for a while but creating many children, it is advisable to + call :py:meth:`~._ListProxy.destroy` when you're finished with them. """ def __init__(self, iterable=None): @@ -146,6 +151,11 @@ class SmartList(_SliceNormalizerMixIn, list): self.extend(other) return self + def _release_children(self): + copy = list(self) + for child in self._children: + child._parent = copy + @inheritdoc def append(self, item): head = len(self) @@ -174,17 +184,13 @@ class SmartList(_SliceNormalizerMixIn, list): @inheritdoc def reverse(self): - copy = list(self) - for child in self._children: - child._parent = copy + self._release_children() super(SmartList, self).reverse() if py3k: @inheritdoc def sort(self, key=None, reverse=None): - copy = list(self) - for child in self._children: - child._parent = copy + self._release_children() kwargs = {} if key is not None: kwargs["key"] = key @@ -194,9 +200,7 @@ class SmartList(_SliceNormalizerMixIn, list): else: @inheritdoc def sort(self, cmp=None, key=None, reverse=None): - copy = list(self) - for child in self._children: - child._parent = copy + self._release_children() kwargs = {} if cmp is not None: kwargs["cmp"] = cmp @@ -448,5 +452,9 @@ class _ListProxy(_SliceNormalizerMixIn, list): item.sort(**kwargs) self._parent[self._start:self._stop:self._step] = item + def destroy(self): + """Make the parent forget this child. The child will no longer work.""" + self._parent._children.pop(id(self)) + del inheritdoc diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index f07101b..4248652 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -21,8 +21,8 @@ # SOFTWARE. """ -This module contains accessory functions that wrap around existing ones to -provide additional functionality. +This module contains accessory functions for other parts of the library. Parser +users generally won't need stuff from here. """ from __future__ import unicode_literals @@ -31,7 +31,16 @@ from .compat import bytes, str from .nodes import Node from .smart_list import SmartList -__all__ = ["parse_anything"] +__all__ = ["get_children", "parse_anything"] + +def get_children(node, contexts=False, parent=None): + """Iterate over all child :py:class:`.Node`\ s of a given *node*.""" + ## DON'T MAKE THIS RECURSIVE, USE A STACK! + yield (parent, node) if contexts else node + for code in node.__children__(): + for descendant in code.nodes: + for child in get_children(descendant, contexts, code): + yield child def parse_anything(value, context=0): """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 2444cfa..8b9daff 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -21,13 +21,14 @@ # SOFTWARE. from __future__ import unicode_literals +from itertools import chain import re -from .compat import py3k, str +from .compat import py3k, range, str from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) from .string_mixin import StringMixIn -from .utils import parse_anything +from .utils import get_children, parse_anything __all__ = ["Wikicode"] @@ -51,107 +52,86 @@ class Wikicode(StringMixIn): def __unicode__(self): return "".join([str(node) for node in self.nodes]) - def _get_children(self, node): - """Iterate over all descendants of a given *node*, including itself. - - This is implemented by the ``__iternodes__()`` generator of ``Node`` - classes, which by default yields itself and nothing more. - """ - for context, child in node.__iternodes__(self._get_all_nodes): - yield child - - def _get_all_nodes(self, code): - """Iterate over all of our descendant nodes. - - This is implemented by calling :py:meth:`_get_children` on every node - in our node list (:py:attr:`self.nodes `). - """ - for node in code.nodes: - for child in self._get_children(node): - yield child - - def _is_equivalent(self, obj, node): - """Return ``True`` if *obj* and *node* are equivalent, else ``False``. - - If *obj* is a ``Node``, the function will test whether they are the - same object, otherwise it will compare them with ``==``. - """ - return (node is obj) if isinstance(obj, Node) else (node == obj) - - def _contains(self, nodes, obj): - """Return ``True`` if *obj* is inside of *nodes*, else ``False``. - - If *obj* is a ``Node``, we will only return ``True`` if *obj* is - actually in the list (and not just a node that equals it). Otherwise, - the test is simply ``obj in nodes``. + @staticmethod + def _slice_replace(code, index, old, new): + """Replace the string *old* with *new* across *index* in *code*.""" + nodes = [str(node) for node in code.get(index)] + substring = "".join(nodes).replace(old, new) + code.nodes[index] = parse_anything(substring).nodes + + def _do_strong_search(self, obj, recursive=True): + """Search for the specific element *obj* within the node list. + + *obj* can be either a :py:class:`.Node` or a :py:class:`.Wikicode` + object. If found, we return a tuple (*context*, *index*) where + *context* is the :py:class:`.Wikicode` that contains *obj* and *index* + is its index there, as a :py:class:`slice`. Note that if *recursive* is + ``False``, *context* will always be ``self`` (since we only look for + *obj* among immediate descendants), but if *recursive* is ``True``, + then it could be any :py:class:`.Wikicode` contained by a node within + ``self``. If *obj* is not found, :py:exc:`ValueError` is raised. """ + mkslice = lambda i: slice(i, i + 1) if isinstance(obj, Node): - for node in nodes: - if node is obj: - return True - return False - return obj in nodes - - def _prepare_search(self, obj): - """Prepare a new search by calculating the exact parameters. + if not recursive: + return self, mkslice(self.index(obj)) + for i, node in enumerate(self.nodes): + for context, child in get_children(node, contexts=True): + if obj is child: + if not context: + context = self + return context, mkslice(context.index(child)) + else: + context, ind = self._do_strong_search(obj.get(0), recursive) + for i in range(1, len(obj.nodes)): + if obj.get(i) is not context.get(ind.start + i): + break + else: + return context, slice(ind.start, ind.start + len(obj.nodes)) + raise ValueError(obj) - *obj*, which may be anything passable to :py:func:`.parse_anything`, is - converted to either a single :py:class:`.Node` or a - :py:class:`.Wikicode` of multiple nodes. *literal* is a boolean; - ``True`` if we are searching for an exact match with ``is`` or - ``False`` if we are searching for equality with ``==``. + def _do_weak_search(self, obj, recursive): + """Search for an element that looks like *obj* within the node list. + + This follows the same rules as :py:meth:`_do_strong_search` with some + differences. *obj* is treated as a string that might represent any + :py:class:`.Node`, :py:class:`.Wikicode`, or combination of the two + present in the node list. Thus, matching is weak (using string + comparisons) rather than strong (using ``is``). Because multiple nodes + can match *obj*, the result is a list of tuples instead of just one + (however, :py:exc:`ValueError` is still raised if nothing is found). + Individual matches will never overlap. + + The tuples contain a new first element, *exact*, which is ``True`` if + we were able to match *obj* exactly to one or more adjacent nodes, or + ``False`` if we found *obj* inside a node or incompletely spanning + multiple nodes. """ - literal = isinstance(obj, (Node, Wikicode)) obj = parse_anything(obj) if not obj or obj not in self: raise ValueError(obj) - if len(obj.nodes) == 1: - obj = obj.get(0) - return obj, literal - - def _do_search(self, obj, recursive, context=None, literal=None): - """Return some info about the location of *obj* within *context*. - - If *recursive* is ``True``, we'll look within *context* (``self`` by - default) and its descendants, otherwise just *context*. We raise - :py:exc:`ValueError` if *obj* isn't found. The return data is a list of - 3-tuples (*type*, *context*, *data*) where *type* is *obj*\ 's best - type resolution (either ``Node``, ``Wikicode``, or ``str``), *context* - is the closest ``Wikicode`` encompassing it, and *data* is either a - ``Node``, a list of ``Node``\ s, or ``None`` depending on *type*. - """ - if not context: - context = self - obj, literal = self._prepare_search(obj) - compare = (lambda a, b: a is b) if literal else (lambda a, b: a == b) results = [] - i = 0 - while i < len(context.nodes): - node = context.get(i) - if isinstance(obj, Node) and compare(obj, node): - results.append((Node, context, node)) - elif isinstance(obj, Wikicode) and compare(obj.get(0), node): - for j in range(1, len(obj.nodes)): - if not compare(obj.get(j), context.get(i + j)): - break - else: - nodes = list(context.nodes[i:i + len(obj.nodes)]) - results.append((Wikicode, context, nodes)) - i += len(obj.nodes) - 1 - elif recursive and not isinstance(node, Text) and obj in node: - contexts = node.__iternodes__(self._get_all_nodes) - processed = [] - for code in (ctx for ctx, child in contexts): - if code and code not in processed and obj in code: - search = self._do_search(obj, recursive, code, literal) - results.extend(search) - processed.append(code) - i += 1 - - if not results and not literal and recursive: - results.append((str, context, None)) - if not results and context is self: - raise ValueError(obj) + contexts = [self] + while contexts: + context = contexts.pop() + i = len(context.nodes) - 1 + while i >= 0: + node = context.get(i) + if obj.get(-1) == node: + for j in range(-len(obj.nodes), -1): + if obj.get(j) != context.get(i + j + 1): + break + else: + i -= len(obj.nodes) - 1 + index = slice(i, i + len(obj.nodes)) + results.append((True, context, index)) + elif recursive and obj in node: + contexts.extend(node.__children__()) + i -= 1 + if not results: + if not recursive: + raise ValueError(obj) + results.append((False, self, slice(0, len(self.nodes)))) return results def _get_tree(self, code, lines, marker, indent): @@ -256,15 +236,15 @@ class Wikicode(StringMixIn): return the index of our direct descendant node within *our* list of nodes. Otherwise, the lookup is done only on direct descendants. """ - if recursive: - for i, node in enumerate(self.nodes): - if self._contains(self._get_children(node), obj): - return i - raise ValueError(obj) - + strict = isinstance(obj, Node) + equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n) for i, node in enumerate(self.nodes): - if self._is_equivalent(obj, node): - return i + if recursive: + for child in get_children(node): + if equivalent(obj, child): + return i + elif equivalent(obj, node): + return i raise ValueError(obj) def insert(self, index, value): @@ -279,66 +259,79 @@ class Wikicode(StringMixIn): self.nodes.insert(index, node) def insert_before(self, obj, value, recursive=True): - """Insert *value* immediately before *obj* in the list of nodes. + """Insert *value* immediately before *obj*. - *obj* can be either a string, a :py:class:`~.Node`, or other + *obj* can be either a string, a :py:class:`~.Node`, or another :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, - for example). *value* can be anything parasable by - :py:func:`.parse_anything`. If *recursive* is ``True``, we will try to - find *obj* within our child nodes even if it is not a direct descendant - of this :py:class:`~.Wikicode` object. If *obj* is not found, + for example). If *obj* is a string, we will operate on all instances + of that string within the code, otherwise only on the specific instance + given. *value* can be anything parasable by :py:func:`.parse_anything`. + If *recursive* is ``True``, we will try to find *obj* within our child + nodes even if it is not a direct descendant of this + :py:class:`~.Wikicode` object. If *obj* is not found, :py:exc:`ValueError` is raised. """ - for restype, context, data in self._do_search(obj, recursive): - if restype in (Node, Wikicode): - i = context.index(data if restype is Node else data[0], False) - context.insert(i, value) - else: - obj = str(obj) - context.nodes = str(context).replace(obj, str(value) + obj) + if isinstance(obj, (Node, Wikicode)): + context, index = self._do_strong_search(obj, recursive) + context.insert(index.start, value) + else: + for exact, context, index in self._do_weak_search(obj, recursive): + if exact: + context.insert(index.start, value) + else: + obj = str(obj) + self._slice_replace(context, index, obj, str(value) + obj) def insert_after(self, obj, value, recursive=True): - """Insert *value* immediately after *obj* in the list of nodes. + """Insert *value* immediately after *obj*. - *obj* can be either a string, a :py:class:`~.Node`, or other + *obj* can be either a string, a :py:class:`~.Node`, or another :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, - for example). *value* can be anything parasable by - :py:func:`.parse_anything`. If *recursive* is ``True``, we will try to - find *obj* within our child nodes even if it is not a direct descendant - of this :py:class:`~.Wikicode` object. If *obj* is not found, + for example). If *obj* is a string, we will operate on all instances + of that string within the code, otherwise only on the specific instance + given. *value* can be anything parasable by :py:func:`.parse_anything`. + If *recursive* is ``True``, we will try to find *obj* within our child + nodes even if it is not a direct descendant of this + :py:class:`~.Wikicode` object. If *obj* is not found, :py:exc:`ValueError` is raised. """ - for restype, context, data in self._do_search(obj, recursive): - if restype in (Node, Wikicode): - i = context.index(data if restype is Node else data[-1], False) - context.insert(i + 1, value) - else: - obj = str(obj) - context.nodes = str(context).replace(obj, obj + str(value)) + if isinstance(obj, (Node, Wikicode)): + context, index = self._do_strong_search(obj, recursive) + context.insert(index.stop, value) + else: + for exact, context, index in self._do_weak_search(obj, recursive): + if exact: + context.insert(index.stop, value) + else: + obj = str(obj) + self._slice_replace(context, index, obj, obj + str(value)) def replace(self, obj, value, recursive=True): - """Replace *obj* with *value* in the list of nodes. + """Replace *obj* with *value*. - *obj* can be either a string, a :py:class:`~.Node`, or other + *obj* can be either a string, a :py:class:`~.Node`, or another :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, - for example). *value* can be anything parasable by - :py:func:`.parse_anything`. If *recursive* is ``True``, we will try to - find *obj* within our child nodes even if it is not a direct descendant - of this :py:class:`~.Wikicode` object. If *obj* is not found, + for example). If *obj* is a string, we will operate on all instances + of that string within the code, otherwise only on the specific instance + given. *value* can be anything parasable by :py:func:`.parse_anything`. + If *recursive* is ``True``, we will try to find *obj* within our child + nodes even if it is not a direct descendant of this + :py:class:`~.Wikicode` object. If *obj* is not found, :py:exc:`ValueError` is raised. """ - for restype, context, data in self._do_search(obj, recursive): - if restype is Node: - i = context.index(data, False) - context.nodes.pop(i) - context.insert(i, value) - elif restype is Wikicode: - i = context.index(data[0], False) - for _ in data: - context.nodes.pop(i) - context.insert(i, value) - else: - context.nodes = str(context).replace(str(obj), str(value)) + if isinstance(obj, (Node, Wikicode)): + context, index = self._do_strong_search(obj, recursive) + for i in range(index.start, index.stop): + context.nodes.pop(index.start) + context.insert(index.start, value) + else: + for exact, context, index in self._do_weak_search(obj, recursive): + if exact: + for i in range(index.start, index.stop): + context.nodes.pop(index.start) + context.insert(index.start, value) + else: + self._slice_replace(context, index, str(obj), str(value)) def append(self, value): """Insert *value* at the end of the list of nodes. @@ -352,22 +345,26 @@ class Wikicode(StringMixIn): def remove(self, obj, recursive=True): """Remove *obj* from the list of nodes. - *obj* can be either a string, a :py:class:`~.Node`, or other + *obj* can be either a string, a :py:class:`~.Node`, or another :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, - for example). If *recursive* is ``True``, we will try to find *obj* - within our child nodes even if it is not a direct descendant of this + for example). If *obj* is a string, we will operate on all instances + of that string within the code, otherwise only on the specific instance + given. If *recursive* is ``True``, we will try to find *obj* within our + child nodes even if it is not a direct descendant of this :py:class:`~.Wikicode` object. If *obj* is not found, :py:exc:`ValueError` is raised. """ - for restype, context, data in self._do_search(obj, recursive): - if restype is Node: - context.nodes.pop(context.index(data, False)) - elif restype is Wikicode: - i = context.index(data[0], False) - for _ in data: - context.nodes.pop(i) - else: - context.nodes = str(context).replace(str(obj), "") + if isinstance(obj, (Node, Wikicode)): + context, index = self._do_strong_search(obj, recursive) + for i in range(index.start, index.stop): + context.nodes.pop(index.start) + else: + for exact, context, index in self._do_weak_search(obj, recursive): + if exact: + for i in range(index.start, index.stop): + context.nodes.pop(index.start) + else: + self._slice_replace(context, index, str(obj), "") def matches(self, other): """Do a loose equivalency test suitable for comparing page names. @@ -407,7 +404,11 @@ class Wikicode(StringMixIn): """ if matches and not callable(matches): pat, matches = matches, lambda obj: re.search(pat, str(obj), flags) - for node in (self._get_all_nodes(self) if recursive else self.nodes): + if recursive: + nodes = chain.from_iterable(get_children(n) for n in self.nodes) + else: + nodes = self.nodes + for node in nodes: if not forcetype or isinstance(node, forcetype): if not matches or matches(node): yield node diff --git a/tests/test_argument.py b/tests/test_argument.py index df6838d..ee6c580 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -44,9 +44,9 @@ class TestArgument(TreeEqualityTestCase): node2 = Argument(wraptext("foo"), wrap([Text("bar"), Text("baz")])) gen1 = node1.__children__() gen2 = node2.__children__() - self.assertIs(node1.name, gen1) - self.assertIs(node2.name, gen2) - self.assertIs(node2.default, gen2) + self.assertIs(node1.name, next(gen1)) + self.assertIs(node2.name, next(gen2)) + self.assertIs(node2.default, next(gen2)) self.assertRaises(StopIteration, next, gen1) self.assertRaises(StopIteration, next, gen2) From f0a591b32356470449dccd74304cdd1a791e9cf2 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Nov 2013 22:10:36 -0500 Subject: [PATCH 17/39] Move get_children() out of utils. --- mwparserfromhell/utils.py | 11 +---------- mwparserfromhell/wikicode.py | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 4248652..46abcc0 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -31,16 +31,7 @@ from .compat import bytes, str from .nodes import Node from .smart_list import SmartList -__all__ = ["get_children", "parse_anything"] - -def get_children(node, contexts=False, parent=None): - """Iterate over all child :py:class:`.Node`\ s of a given *node*.""" - ## DON'T MAKE THIS RECURSIVE, USE A STACK! - yield (parent, node) if contexts else node - for code in node.__children__(): - for descendant in code.nodes: - for child in get_children(descendant, contexts, code): - yield child +__all__ = ["parse_anything"] def parse_anything(value, context=0): """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 8b9daff..2f21a67 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -21,6 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals +from collections import deque from itertools import chain import re @@ -28,7 +29,7 @@ from .compat import py3k, range, str from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) from .string_mixin import StringMixIn -from .utils import get_children, parse_anything +from .utils import parse_anything __all__ = ["Wikicode"] @@ -53,6 +54,15 @@ class Wikicode(StringMixIn): return "".join([str(node) for node in self.nodes]) @staticmethod + def _get_children(node, contexts=False, parent=None): + """Iterate over all child :py:class:`.Node`\ s of a given *node*.""" + yield (parent, node) if contexts else node + for code in node.__children__(): + for child in code.nodes: + for result in Wikicode._get_children(child, contexts, code): + yield result + + @staticmethod def _slice_replace(code, index, old, new): """Replace the string *old* with *new* across *index* in *code*.""" nodes = [str(node) for node in code.get(index)] @@ -76,7 +86,7 @@ class Wikicode(StringMixIn): if not recursive: return self, mkslice(self.index(obj)) for i, node in enumerate(self.nodes): - for context, child in get_children(node, contexts=True): + for context, child in self._get_children(node, contexts=True): if obj is child: if not context: context = self @@ -240,7 +250,7 @@ class Wikicode(StringMixIn): equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n) for i, node in enumerate(self.nodes): if recursive: - for child in get_children(node): + for child in self._get_children(node): if equivalent(obj, child): return i elif equivalent(obj, node): @@ -405,7 +415,8 @@ class Wikicode(StringMixIn): if matches and not callable(matches): pat, matches = matches, lambda obj: re.search(pat, str(obj), flags) if recursive: - nodes = chain.from_iterable(get_children(n) for n in self.nodes) + getter = self._get_children + nodes = chain.from_iterable(getter(n) for n in self.nodes) else: nodes = self.nodes for node in nodes: From 3d6079d831d6c958b364b04d0341437aa9f0ac28 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Nov 2013 22:13:14 -0500 Subject: [PATCH 18/39] Mixed up py3k and py2k. --- mwparserfromhell/compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 9c8ce8c..620fd15 100644 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -16,14 +16,14 @@ py32 = py3k and sys.version_info.minor == 2 if py3k: bytes = bytes str = str - range = xrange + range = range maxsize = sys.maxsize import html.entities as htmlentities else: bytes = str str = unicode - range = range + range = xrange maxsize = sys.maxint import htmlentitydefs as htmlentities From 1607687c37c1b1e7c0c83a39d7803707665151ef Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Nov 2013 22:28:36 -0500 Subject: [PATCH 19/39] Remove unused import; fix indentation. --- mwparserfromhell/wikicode.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 2f21a67..90453d6 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -21,7 +21,6 @@ # SOFTWARE. from __future__ import unicode_literals -from collections import deque from itertools import chain import re @@ -254,7 +253,7 @@ class Wikicode(StringMixIn): if equivalent(obj, child): return i elif equivalent(obj, node): - return i + return i raise ValueError(obj) def insert(self, index, value): From df181947af8d2010b5d10bdb52b2937f107cc271 Mon Sep 17 00:00:00 2001 From: Merlijn van Deen Date: Fri, 3 Jan 2014 19:14:32 +0100 Subject: [PATCH 20/39] setup.py: Raise Exception on unsupported platforms --- setup.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/setup.py b/setup.py index d545fe4..e2e51f4 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,12 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import sys + +if (sys.version_info[0] == 2 and sys.version_info[1] < 7) or \ + (sys.version_info[1] == 3 and sys.version_info[1] < 2): + raise Exception('mwparserfromhell needs Python 2.7+ or 3.2+') + from setuptools import setup, find_packages, Extension from mwparserfromhell import __version__ From e5f17eea0063006c7f48a6942e3e434bce73481f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 3 Jan 2014 20:31:19 -0500 Subject: [PATCH 21/39] Update copyright notices for 2014. --- LICENSE | 2 +- docs/conf.py | 2 +- mwparserfromhell/__init__.py | 4 ++-- mwparserfromhell/definitions.py | 2 +- mwparserfromhell/nodes/__init__.py | 2 +- mwparserfromhell/nodes/argument.py | 2 +- mwparserfromhell/nodes/comment.py | 2 +- mwparserfromhell/nodes/external_link.py | 2 +- mwparserfromhell/nodes/extras/__init__.py | 2 +- mwparserfromhell/nodes/extras/attribute.py | 2 +- mwparserfromhell/nodes/extras/parameter.py | 2 +- mwparserfromhell/nodes/heading.py | 2 +- mwparserfromhell/nodes/html_entity.py | 2 +- mwparserfromhell/nodes/tag.py | 2 +- mwparserfromhell/nodes/template.py | 2 +- mwparserfromhell/nodes/text.py | 2 +- mwparserfromhell/nodes/wikilink.py | 2 +- mwparserfromhell/parser/__init__.py | 2 +- mwparserfromhell/parser/builder.py | 2 +- mwparserfromhell/parser/contexts.py | 2 +- mwparserfromhell/parser/tokenizer.c | 2 +- mwparserfromhell/parser/tokenizer.h | 2 +- mwparserfromhell/parser/tokenizer.py | 2 +- mwparserfromhell/parser/tokens.py | 2 +- mwparserfromhell/smart_list.py | 2 +- mwparserfromhell/string_mixin.py | 2 +- mwparserfromhell/utils.py | 2 +- mwparserfromhell/wikicode.py | 2 +- setup.py | 2 +- tests/_test_tokenizer.py | 2 +- tests/_test_tree_equality.py | 2 +- tests/test_argument.py | 2 +- tests/test_attribute.py | 2 +- tests/test_builder.py | 2 +- tests/test_comment.py | 2 +- tests/test_ctokenizer.py | 2 +- tests/test_docs.py | 2 +- tests/test_external_link.py | 2 +- tests/test_heading.py | 2 +- tests/test_html_entity.py | 2 +- tests/test_parameter.py | 2 +- tests/test_parser.py | 2 +- tests/test_pytokenizer.py | 2 +- tests/test_smart_list.py | 2 +- tests/test_string_mixin.py | 2 +- tests/test_tag.py | 2 +- tests/test_template.py | 2 +- tests/test_text.py | 2 +- tests/test_tokens.py | 2 +- tests/test_utils.py | 2 +- tests/test_wikicode.py | 2 +- tests/test_wikilink.py | 2 +- 52 files changed, 53 insertions(+), 53 deletions(-) diff --git a/LICENSE b/LICENSE index 71b7129..327905b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2012-2013 Ben Kurtovic +Copyright (C) 2012-2014 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/conf.py b/docs/conf.py index 9fa1e02..dd1d6e1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = u'mwparserfromhell' -copyright = u'2012, 2013 Ben Kurtovic' +copyright = u'2012, 2013, 2014 Ben Kurtovic' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index a9dc2ff..e7459e3 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki `_ wikicode. from __future__ import unicode_literals __author__ = "Ben Kurtovic" -__copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" +__copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic" __license__ = "MIT License" __version__ = "0.4.dev" __email__ = "ben.kurtovic@gmail.com" diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py index d0b7759..6020ad1 100644 --- a/mwparserfromhell/definitions.py +++ b/mwparserfromhell/definitions.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 91afb23..223cc67 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index d28d979..a595dfb 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index 4ecc173..fcfd946 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index 89eab1f..d13376e 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py index 1895f0d..a131269 100644 --- a/mwparserfromhell/nodes/extras/__init__.py +++ b/mwparserfromhell/nodes/extras/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 6266a7a..4b7c668 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index dfba277..e273af9 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index e3a0ae5..47c23a8 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 781bcfe..c75cb99 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 6869c72..661304e 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index bb0b912..d1a0b0e 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index c87594a..55c714e 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index f730697..4640f34 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 6cbfa3a..093e501 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 132f5d4..5f8ce45 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index dcf276c..1d9adf1 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index c37d8dc..a8aae65 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1,6 +1,6 @@ /* Tokenizer for MWParserFromHell -Copyright (C) 2012-2013 Ben Kurtovic +Copyright (C) 2012-2014 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index ef5acd6..41d1e0b 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -1,6 +1,6 @@ /* Tokenizer Header File for MWParserFromHell -Copyright (C) 2012-2013 Ben Kurtovic +Copyright (C) 2012-2014 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index bbc3b4c..7ed4daf 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index 383ddbe..a152abe 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 16d9b1a..5fa9055 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 856035b..3599fe4 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 46abcc0..486170d 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 90453d6..312f3a0 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/setup.py b/setup.py index e2e51f4..3718e68 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index d3ba2f6..7487241 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 3305bc1..25682a9 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_argument.py b/tests/test_argument.py index ee6c580..d58137e 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_attribute.py b/tests/test_attribute.py index 83f9a1e..4bb6643 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_builder.py b/tests/test_builder.py index 41eca4b..26107a9 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_comment.py b/tests/test_comment.py index bea39d8..eb87e7e 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 21b31f4..db79d8f 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_docs.py b/tests/test_docs.py index 4047c82..441132c 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 7f5f042..0f82743 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_heading.py b/tests/test_heading.py index 2fe9ffe..59ca59e 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 60f4c38..6bf1103 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_parameter.py b/tests/test_parameter.py index a43ffe6..7345660 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_parser.py b/tests/test_parser.py index 9975824..59d57a2 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index b769c97..48a0efc 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index d566f96..374b312 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 43ae9f4..d825d73 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_tag.py b/tests/test_tag.py index 8ee0fc0..e7e1554 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_template.py b/tests/test_template.py index 7326433..6e1b09a 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_text.py b/tests/test_text.py index ba45aa3..930152c 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 677c973..6539675 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_utils.py b/tests/test_utils.py index 7d90813..4512027 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index c09f8a2..38133d7 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index c19d5ca..fc2abf4 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2013 Ben Kurtovic +# Copyright (C) 2012-2014 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal From 9650cd6276ae83264291419f9d74f8c73d514358 Mon Sep 17 00:00:00 2001 From: Marcio Faustino Date: Thu, 27 Feb 2014 12:41:02 +0100 Subject: [PATCH 22/39] Support `.encode()` keyword arguments for Python 2.6. --- mwparserfromhell/string_mixin.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 3599fe4..10c88e3 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -32,6 +32,15 @@ from .compat import bytes, py3k, str __all__ = ["StringMixIn"] +def inheritdoc(method): + """Set __doc__ of *method* to __doc__ of *method* in its parent class. + + Since this is used on :py:class:`~.StringMixIn`, the "parent class" used is + ``str``. This function can be used as a decorator. + """ + method.__doc__ = getattr(str, method.__name__).__doc__ + return method + class StringMixIn(object): """Implement the interface for ``unicode``/``str`` in a dynamic manner. @@ -99,8 +108,20 @@ class StringMixIn(object): def __contains__(self, item): return str(item) in self.__unicode__() + @inheritdoc + def encode(self, encoding=None, errors=None): + if encoding is None: + encoding = getdefaultencoding() + args = [encoding] + if errors is not None: + args.append(errors) + return self.__unicode__().encode(*args) + def __getattr__(self, attr): return getattr(self.__unicode__(), attr) if py3k: maketrans = str.maketrans # Static method can't rely on __getattr__ + + +del inheritdoc From ee194fb07a47f74de52c24f164a3487d0665dbb8 Mon Sep 17 00:00:00 2001 From: Marcio Faustino Date: Thu, 27 Feb 2014 12:43:25 +0100 Subject: [PATCH 23/39] Use a generator expression instead to support Python 2.6. --- mwparserfromhell/parser/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 7ed4daf..b3ac543 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -620,7 +620,7 @@ class Tokenizer(object): self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], pad_before_eq=buf["before_eq"], pad_after_eq=buf["after_eq"])) self._emit_all(self._pop()) - data.padding_buffer = {key: "" for key in data.padding_buffer} + data.padding_buffer = dict((key, "") for key in data.padding_buffer) def _handle_tag_space(self, data, text): """Handle whitespace (*text*) inside of an HTML open tag.""" From 3d1329aa3adf7368852db190d76e988af3cc80d9 Mon Sep 17 00:00:00 2001 From: Marcio Faustino Date: Thu, 27 Feb 2014 12:44:37 +0100 Subject: [PATCH 24/39] Don't assume it's a named tuple to support Python 2.6. --- mwparserfromhell/compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 620fd15..1a40d31 100644 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -10,8 +10,8 @@ types are meant to be imported directly from within the parser's modules. import sys -py3k = sys.version_info.major == 3 -py32 = py3k and sys.version_info.minor == 2 +py3k = (sys.version_info[0] == 3) +py32 = py3k and (sys.version_info[1] == 2) if py3k: bytes = bytes From 88c8fb88e0cd788949109a2d9d1d4b12e574f1d2 Mon Sep 17 00:00:00 2001 From: Marcio Faustino Date: Thu, 27 Feb 2014 12:46:26 +0100 Subject: [PATCH 25/39] Switch to `unittest2` to be able to use new features in Python 2.6. --- discover_tests.py | 23 +++++++++++++++++++++++ setup.py | 3 ++- tests/_test_tokenizer.py | 4 ++-- tests/_test_tree_equality.py | 2 +- tests/test_argument.py | 4 ++-- tests/test_attribute.py | 4 ++-- tests/test_builder.py | 4 ++-- tests/test_comment.py | 4 ++-- tests/test_ctokenizer.py | 8 ++++---- tests/test_docs.py | 6 +++--- tests/test_external_link.py | 4 ++-- tests/test_heading.py | 4 ++-- tests/test_html_entity.py | 4 ++-- tests/test_parameter.py | 4 ++-- tests/test_parser.py | 4 ++-- tests/test_pytokenizer.py | 6 +++--- tests/test_smart_list.py | 6 +++--- tests/test_string_mixin.py | 6 +++--- tests/test_tag.py | 4 ++-- tests/test_template.py | 4 ++-- tests/test_text.py | 6 +++--- tests/test_tokens.py | 6 +++--- tests/test_utils.py | 4 ++-- tests/test_wikicode.py | 4 ++-- tests/test_wikilink.py | 4 ++-- 25 files changed, 78 insertions(+), 54 deletions(-) create mode 100644 discover_tests.py diff --git a/discover_tests.py b/discover_tests.py new file mode 100644 index 0000000..5065aba --- /dev/null +++ b/discover_tests.py @@ -0,0 +1,23 @@ +# -*- coding: UTF-8 -*- + + +""" +Discover tests using ``unittest2`. + +It appears the default distutils test suite doesn't play nice with +``setUpClass`` thereby making some tests fail. Using ``unittest2`` +to load tests seems to work around that issue. + +http://stackoverflow.com/a/17004409/753501 +""" + + +# Standard: +import os + +# External: +import unittest2 + + +def additional_tests(): + return unittest2.defaultTestLoader.discover(os.path.dirname(__file__)) diff --git a/setup.py b/setup.py index 3718e68..1dcb7b2 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,8 @@ setup( name = "mwparserfromhell", packages = find_packages(exclude=("tests",)), ext_modules = [tokenizer], - test_suite = "tests", + tests_require = ['unittest2'], + test_suite = "discover_tests", version = __version__, author = "Ben Kurtovic", author_email = "ben.kurtovic@gmail.com", diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 7487241..7f6bb52 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -35,7 +35,7 @@ class _TestParseError(Exception): class TokenizerTestCase(object): """A base test case for tokenizers, whose tests are loaded dynamically. - Subclassed along with unittest.TestCase to form TestPyTokenizer and + Subclassed along with unittest2.TestCase to form TestPyTokenizer and TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer' directory. """ @@ -121,7 +121,7 @@ class TokenizerTestCase(object): if len(sys.argv) > 2 and sys.argv[1] == "--use": for name in sys.argv[2:]: load_file(path.join(directory, name + extension)) - sys.argv = [sys.argv[0]] # So unittest doesn't try to load these + sys.argv = [sys.argv[0]] # So unittest2 doesn't try to load these cls.skip_others = True else: for filename in listdir(directory): diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 25682a9..99003ac 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -from unittest import TestCase +from unittest2 import TestCase from mwparserfromhell.compat import range from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, diff --git a/tests/test_argument.py b/tests/test_argument.py index d58137e..48eca1a 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text @@ -99,4 +99,4 @@ class TestArgument(TreeEqualityTestCase): self.assertIs(None, node2.default) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_attribute.py b/tests/test_attribute.py index 4bb6643..4d22338 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import Template @@ -86,4 +86,4 @@ class TestAttribute(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node, pad, True) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_builder.py b/tests/test_builder.py index 26107a9..6a95f66 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink) @@ -417,4 +417,4 @@ class TestBuilder(TreeEqualityTestCase): self.assertWikicodeEqual(valid, self.builder.build(test)) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_comment.py b/tests/test_comment.py index eb87e7e..784a4b3 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import Comment @@ -64,4 +64,4 @@ class TestComment(TreeEqualityTestCase): self.assertEqual("barfoo", node.contents) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index db79d8f..c20ef3a 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 try: from mwparserfromhell.parser._tokenizer import CTokenizer @@ -30,8 +30,8 @@ except ImportError: from ._test_tokenizer import TokenizerTestCase -@unittest.skipUnless(CTokenizer, "C tokenizer not available") -class TestCTokenizer(TokenizerTestCase, unittest.TestCase): +@unittest2.skipUnless(CTokenizer, "C tokenizer not available") +class TestCTokenizer(TokenizerTestCase, unittest2.TestCase): """Test cases for the C tokenizer.""" @classmethod @@ -45,4 +45,4 @@ class TestCTokenizer(TokenizerTestCase, unittest.TestCase): self.assertTrue(CTokenizer().USES_C) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_docs.py b/tests/test_docs.py index 441132c..1a48cbc 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -22,14 +22,14 @@ from __future__ import print_function, unicode_literals import json -import unittest +import unittest2 import mwparserfromhell from mwparserfromhell.compat import py3k, str from .compat import StringIO, urlencode, urlopen -class TestDocs(unittest.TestCase): +class TestDocs(unittest2.TestCase): """Integration test cases for mwparserfromhell's documentation.""" def assertPrint(self, input, output): @@ -128,4 +128,4 @@ class TestDocs(unittest.TestCase): self.assertEqual(expected, actual) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 0f82743..4fc0561 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import ExternalLink, Text @@ -122,4 +122,4 @@ class TestExternalLink(TreeEqualityTestCase): self.assertEqual("http://example.com/", str(node2)) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_heading.py b/tests/test_heading.py index 59ca59e..fa69978 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text @@ -85,4 +85,4 @@ class TestHeading(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node, "level", False) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 6bf1103..de4c274 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity @@ -165,4 +165,4 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertEqual("\U0001F648", node4.normalize()) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 7345660..0cc1b0f 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text @@ -72,4 +72,4 @@ class TestParameter(TreeEqualityTestCase): self.assertFalse(node2.showkey) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_parser.py b/tests/test_parser.py index 59d57a2..071e2c7 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell import parser from mwparserfromhell.compat import range @@ -86,4 +86,4 @@ class TestParser(TreeEqualityTestCase): parser.use_c = restore if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 48a0efc..147738d 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -21,13 +21,13 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.parser.tokenizer import Tokenizer from ._test_tokenizer import TokenizerTestCase -class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): +class TestPyTokenizer(TokenizerTestCase, unittest2.TestCase): """Test cases for the Python tokenizer.""" @classmethod @@ -41,4 +41,4 @@ class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): self.assertFalse(Tokenizer().USES_C) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 374b312..5b96725 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -21,12 +21,12 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import py3k, range from mwparserfromhell.smart_list import SmartList, _ListProxy -class TestSmartList(unittest.TestCase): +class TestSmartList(unittest2.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" def _test_get_set_del_item(self, builder): @@ -387,4 +387,4 @@ class TestSmartList(unittest.TestCase): self.assertEqual([4, 3, 2, 1.9, 1.8], child2) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index d825d73..bcf1ae0 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -23,7 +23,7 @@ from __future__ import unicode_literals from sys import getdefaultencoding from types import GeneratorType -import unittest +import unittest2 from mwparserfromhell.compat import bytes, py3k, py32, range, str from mwparserfromhell.string_mixin import StringMixIn @@ -36,7 +36,7 @@ class _FakeString(StringMixIn): return self._data -class TestStringMixIn(unittest.TestCase): +class TestStringMixIn(unittest2.TestCase): """Test cases for the StringMixIn class.""" def test_docs(self): @@ -432,4 +432,4 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("000123", str12.zfill(6)) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_tag.py b/tests/test_tag.py index e7e1554..f9d0e87 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import Tag, Template, Text @@ -305,4 +305,4 @@ class TestTag(TreeEqualityTestCase): self.assertEqual('
', node) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_template.py b/tests/test_template.py index 6e1b09a..1d3a547 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text @@ -428,4 +428,4 @@ class TestTemplate(TreeEqualityTestCase): self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_text.py b/tests/test_text.py index 930152c..c9f89cc 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -21,12 +21,12 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text -class TestText(unittest.TestCase): +class TestText(unittest2.TestCase): """Test cases for the Text node.""" def test_unicode(self): @@ -71,4 +71,4 @@ class TestText(unittest.TestCase): self.assertIsInstance(node.value, str) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 6539675..cc2f366 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -21,12 +21,12 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import py3k from mwparserfromhell.parser import tokens -class TestTokens(unittest.TestCase): +class TestTokens(unittest2.TestCase): """Test cases for the Token class and its subclasses.""" def test_issubclass(self): @@ -105,4 +105,4 @@ class TestTokens(unittest.TestCase): self.assertEqual(token, eval(repr(token), vars(tokens))) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_utils.py b/tests/test_utils.py index 4512027..f62c1a2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.nodes import Template, Text from mwparserfromhell.utils import parse_anything @@ -59,4 +59,4 @@ class TestUtils(TreeEqualityTestCase): self.assertRaises(ValueError, parse_anything, ["foo", [object]]) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 38133d7..d7119ed 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -24,7 +24,7 @@ from __future__ import unicode_literals from functools import partial import re from types import GeneratorType -import unittest +import unittest2 from mwparserfromhell.compat import py3k, str from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, @@ -432,4 +432,4 @@ class TestWikicode(TreeEqualityTestCase): self.assertEqual(expected.expandtabs(4), code.get_tree()) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index fc2abf4..425f1ec 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -21,7 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest +import unittest2 from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink @@ -99,4 +99,4 @@ class TestWikilink(TreeEqualityTestCase): self.assertIs(None, node2.text) if __name__ == "__main__": - unittest.main(verbosity=2) + unittest2.main(verbosity=2) From 21830039978ca1c8988b837a5bc3510ba03ba072 Mon Sep 17 00:00:00 2001 From: Marcio Faustino Date: Thu, 27 Feb 2014 12:56:55 +0100 Subject: [PATCH 26/39] Python 2.6 is now supported. --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 1dcb7b2..066143c 100644 --- a/setup.py +++ b/setup.py @@ -23,9 +23,9 @@ import sys -if (sys.version_info[0] == 2 and sys.version_info[1] < 7) or \ +if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \ (sys.version_info[1] == 3 and sys.version_info[1] < 2): - raise Exception('mwparserfromhell needs Python 2.7+ or 3.2+') + raise Exception('mwparserfromhell needs Python 2.6+ or 3.2+') from setuptools import setup, find_packages, Extension @@ -59,6 +59,7 @@ setup( "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", + "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.2", From e632d0dd44a727ab203e998768172931d34767be Mon Sep 17 00:00:00 2001 From: Marcio Faustino Date: Thu, 27 Feb 2014 13:11:59 +0100 Subject: [PATCH 27/39] Update Travis CI configuration to also test with Python 2.6. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 347badd..31090f2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,6 @@ language: python python: + - "2.6" - "2.7" - "3.2" - "3.3" From cc026712068d0c9cc2e1dffbba0cc6453118a285 Mon Sep 17 00:00:00 2001 From: Marcio Faustino Date: Thu, 27 Feb 2014 13:12:29 +0100 Subject: [PATCH 28/39] Update changelog to mention support for Python 2.6. --- CHANGELOG | 1 + docs/changelog.rst | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 558e5cb..84dc148 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ v0.4 (unreleased): +- Added support for Python 2.6. - Template.has() is now passed 'ignore_empty=False' by default instead of True. This fixes a bug when adding parameters to templates with empty fields, and is a breaking change if you rely on the default behavior. diff --git a/docs/changelog.rst b/docs/changelog.rst index 07b02da..ada6e1e 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,7 @@ v0.4 Unreleased (`changes `__): +- Added support for Python 2.6. - :py:meth:`.Template.has` is now passed *ignore_empty=False* by default instead of *True*. This fixes a bug when adding parameters to templates with empty fields, **and is a breaking change if you rely on the default From 19e6c186f3354abd6592f805a3bbc42018372a0a Mon Sep 17 00:00:00 2001 From: Marcio Faustino Date: Thu, 27 Feb 2014 13:37:50 +0100 Subject: [PATCH 29/39] Be explicit with the import. --- discover_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discover_tests.py b/discover_tests.py index 5065aba..3e8dbe9 100644 --- a/discover_tests.py +++ b/discover_tests.py @@ -13,7 +13,7 @@ http://stackoverflow.com/a/17004409/753501 # Standard: -import os +import os.path # External: import unittest2 From a65357a535e97f907a288a55c99ed06abed9701c Mon Sep 17 00:00:00 2001 From: Marcio Faustino Date: Thu, 27 Feb 2014 13:44:32 +0100 Subject: [PATCH 30/39] Correct dependency for Python 3. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 066143c..1e126fb 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ setup( name = "mwparserfromhell", packages = find_packages(exclude=("tests",)), ext_modules = [tokenizer], - tests_require = ['unittest2'], + tests_require = ['unittest2py3k' if py3k else 'unittest2'], test_suite = "discover_tests", version = __version__, author = "Ben Kurtovic", From 1312a1fb8a7a5505c480038698b9ff6f10271c5d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 1 Mar 2014 23:49:17 -0500 Subject: [PATCH 31/39] Some clean up for Python 2.6 support. * Removed unittest2 dependency on Python >2.6. * Moved discover_tests.py into tests/. * tokenizer.c: Fixed errors noted by -Wshorten-64-to-32. --- discover_tests.py | 23 ----------------------- mwparserfromhell/compat.py | 1 + mwparserfromhell/parser/tokenizer.c | 9 +++++---- mwparserfromhell/parser/tokenizer.py | 3 ++- mwparserfromhell/string_mixin.py | 20 ++++++++++---------- setup.py | 6 +++--- tests/_test_tokenizer.py | 4 ++-- tests/_test_tree_equality.py | 6 +++++- tests/discover.py | 24 ++++++++++++++++++++++++ tests/test_argument.py | 8 ++++++-- tests/test_attribute.py | 8 ++++++-- tests/test_builder.py | 8 ++++++-- tests/test_comment.py | 8 ++++++-- tests/test_ctokenizer.py | 12 ++++++++---- tests/test_docs.py | 10 +++++++--- tests/test_external_link.py | 8 ++++++-- tests/test_heading.py | 8 ++++++-- tests/test_html_entity.py | 8 ++++++-- tests/test_parameter.py | 8 ++++++-- tests/test_parser.py | 8 ++++++-- tests/test_pytokenizer.py | 10 +++++++--- tests/test_smart_list.py | 10 +++++++--- tests/test_string_mixin.py | 10 +++++++--- tests/test_tag.py | 8 ++++++-- tests/test_template.py | 8 ++++++-- tests/test_text.py | 10 +++++++--- tests/test_tokens.py | 10 +++++++--- tests/test_utils.py | 8 ++++++-- tests/test_wikicode.py | 8 ++++++-- tests/test_wikilink.py | 8 ++++++-- 30 files changed, 186 insertions(+), 94 deletions(-) delete mode 100644 discover_tests.py create mode 100644 tests/discover.py diff --git a/discover_tests.py b/discover_tests.py deleted file mode 100644 index 3e8dbe9..0000000 --- a/discover_tests.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding: UTF-8 -*- - - -""" -Discover tests using ``unittest2`. - -It appears the default distutils test suite doesn't play nice with -``setUpClass`` thereby making some tests fail. Using ``unittest2`` -to load tests seems to work around that issue. - -http://stackoverflow.com/a/17004409/753501 -""" - - -# Standard: -import os.path - -# External: -import unittest2 - - -def additional_tests(): - return unittest2.defaultTestLoader.discover(os.path.dirname(__file__)) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 1a40d31..4384ace 100644 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -10,6 +10,7 @@ types are meant to be imported directly from within the parser's modules. import sys +py26 = (sys.version_info[0] == 2) and (sys.version_info[1] == 6) py3k = (sys.version_info[0] == 3) py32 = py3k and (sys.version_info[1] == 2) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index a8aae65..88f6490 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -440,7 +440,7 @@ static int Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer, int reverse) { Textbuffer *original = buffer; - int i; + long i; if (reverse) { do { @@ -940,7 +940,8 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) Textbuffer *scheme_buffer = Textbuffer_new(), *temp_buffer; PyObject *scheme; Py_UNICODE chunk; - int slashes, i, j; + long i; + int slashes, j; if (!scheme_buffer) return -1; @@ -1296,8 +1297,8 @@ static int Tokenizer_parse_heading(Tokenizer* self) */ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) { - Py_ssize_t reset = self->head, best; - int i, current, level, diff; + Py_ssize_t reset = self->head; + int best, i, current, level, diff; HeadingData *after, *heading; PyObject *stack; diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index b3ac543..269cee2 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -620,7 +620,8 @@ class Tokenizer(object): self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], pad_before_eq=buf["before_eq"], pad_after_eq=buf["after_eq"])) self._emit_all(self._pop()) - data.padding_buffer = dict((key, "") for key in data.padding_buffer) + for key in data.padding_buffer: + data.padding_buffer[key] = "" def _handle_tag_space(self, data, text): """Handle whitespace (*text*) inside of an HTML open tag.""" diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 10c88e3..fe41d6d 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -28,7 +28,7 @@ interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. from __future__ import unicode_literals from sys import getdefaultencoding -from .compat import bytes, py3k, str +from .compat import bytes, py26, py3k, str __all__ = ["StringMixIn"] @@ -108,20 +108,20 @@ class StringMixIn(object): def __contains__(self, item): return str(item) in self.__unicode__() - @inheritdoc - def encode(self, encoding=None, errors=None): - if encoding is None: - encoding = getdefaultencoding() - args = [encoding] - if errors is not None: - args.append(errors) - return self.__unicode__().encode(*args) - def __getattr__(self, attr): return getattr(self.__unicode__(), attr) if py3k: maketrans = str.maketrans # Static method can't rely on __getattr__ + if py26: + @inheritdoc + def encode(self, encoding=None, errors=None): + if encoding is None: + encoding = getdefaultencoding() + if errors is not None: + return self.__unicode__().encode(encoding, errors) + return self.__unicode__().encode(encoding) + del inheritdoc diff --git a/setup.py b/setup.py index 1e126fb..eaccdb2 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \ from setuptools import setup, find_packages, Extension from mwparserfromhell import __version__ -from mwparserfromhell.compat import py3k +from mwparserfromhell.compat import py26, py3k with open("README.rst") as fp: long_docs = fp.read() @@ -42,8 +42,8 @@ setup( name = "mwparserfromhell", packages = find_packages(exclude=("tests",)), ext_modules = [tokenizer], - tests_require = ['unittest2py3k' if py3k else 'unittest2'], - test_suite = "discover_tests", + tests_require = ["unittest2"] if py26 else [], + test_suite = "tests.discover", version = __version__, author = "Ben Kurtovic", author_email = "ben.kurtovic@gmail.com", diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 7f6bb52..7487241 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -35,7 +35,7 @@ class _TestParseError(Exception): class TokenizerTestCase(object): """A base test case for tokenizers, whose tests are loaded dynamically. - Subclassed along with unittest2.TestCase to form TestPyTokenizer and + Subclassed along with unittest.TestCase to form TestPyTokenizer and TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer' directory. """ @@ -121,7 +121,7 @@ class TokenizerTestCase(object): if len(sys.argv) > 2 and sys.argv[1] == "--use": for name in sys.argv[2:]: load_file(path.join(directory, name + extension)) - sys.argv = [sys.argv[0]] # So unittest2 doesn't try to load these + sys.argv = [sys.argv[0]] # So unittest doesn't try to load these cls.skip_others = True else: for filename in listdir(directory): diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 99003ac..10d491e 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -from unittest2 import TestCase + +try: + from unittest2 import TestCase +except ImportError: + from unittest import TestCase from mwparserfromhell.compat import range from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, diff --git a/tests/discover.py b/tests/discover.py new file mode 100644 index 0000000..6bb971b --- /dev/null +++ b/tests/discover.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +""" +Discover tests using ``unittest2` for Python 2.6. + +It appears the default distutils test suite doesn't play nice with +``setUpClass`` thereby making some tests fail. Using ``unittest2`` to load +tests seems to work around that issue. + +http://stackoverflow.com/a/17004409/753501 +""" + +import os.path + +from mwparserfromhell.compat import py26 + +if py26: + import unittest2 as unittest +else: + import unittest + +def additional_tests(): + project_root = os.path.split(os.path.dirname(__file__))[0] + return unittest.defaultTestLoader.discover(project_root) diff --git a/tests/test_argument.py b/tests/test_argument.py index 48eca1a..3539ec4 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text @@ -99,4 +103,4 @@ class TestArgument(TreeEqualityTestCase): self.assertIs(None, node2.default) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_attribute.py b/tests/test_attribute.py index 4d22338..50eed74 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Template @@ -86,4 +90,4 @@ class TestAttribute(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node, pad, True) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_builder.py b/tests/test_builder.py index 6a95f66..c8fdca3 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink) @@ -417,4 +421,4 @@ class TestBuilder(TreeEqualityTestCase): self.assertWikicodeEqual(valid, self.builder.build(test)) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_comment.py b/tests/test_comment.py index 784a4b3..cac8719 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Comment @@ -64,4 +68,4 @@ class TestComment(TreeEqualityTestCase): self.assertEqual("barfoo", node.contents) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index c20ef3a..52427e3 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest try: from mwparserfromhell.parser._tokenizer import CTokenizer @@ -30,8 +34,8 @@ except ImportError: from ._test_tokenizer import TokenizerTestCase -@unittest2.skipUnless(CTokenizer, "C tokenizer not available") -class TestCTokenizer(TokenizerTestCase, unittest2.TestCase): +@unittest.skipUnless(CTokenizer, "C tokenizer not available") +class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" @classmethod @@ -45,4 +49,4 @@ class TestCTokenizer(TokenizerTestCase, unittest2.TestCase): self.assertTrue(CTokenizer().USES_C) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_docs.py b/tests/test_docs.py index 1a48cbc..c873f0e 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -22,14 +22,18 @@ from __future__ import print_function, unicode_literals import json -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest import mwparserfromhell from mwparserfromhell.compat import py3k, str from .compat import StringIO, urlencode, urlopen -class TestDocs(unittest2.TestCase): +class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" def assertPrint(self, input, output): @@ -128,4 +132,4 @@ class TestDocs(unittest2.TestCase): self.assertEqual(expected, actual) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 4fc0561..c81470e 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import ExternalLink, Text @@ -122,4 +126,4 @@ class TestExternalLink(TreeEqualityTestCase): self.assertEqual("http://example.com/", str(node2)) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_heading.py b/tests/test_heading.py index fa69978..7c7a7ee 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text @@ -85,4 +89,4 @@ class TestHeading(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node, "level", False) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index de4c274..eb6f606 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity @@ -165,4 +169,4 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertEqual("\U0001F648", node4.normalize()) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 0cc1b0f..ee52b59 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text @@ -72,4 +76,4 @@ class TestParameter(TreeEqualityTestCase): self.assertFalse(node2.showkey) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_parser.py b/tests/test_parser.py index 071e2c7..955f455 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell import parser from mwparserfromhell.compat import range @@ -86,4 +90,4 @@ class TestParser(TreeEqualityTestCase): parser.use_c = restore if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 147738d..40e2caf 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -21,13 +21,17 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.parser.tokenizer import Tokenizer from ._test_tokenizer import TokenizerTestCase -class TestPyTokenizer(TokenizerTestCase, unittest2.TestCase): +class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the Python tokenizer.""" @classmethod @@ -41,4 +45,4 @@ class TestPyTokenizer(TokenizerTestCase, unittest2.TestCase): self.assertFalse(Tokenizer().USES_C) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 5b96725..b739d62 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -21,12 +21,16 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import py3k, range from mwparserfromhell.smart_list import SmartList, _ListProxy -class TestSmartList(unittest2.TestCase): +class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" def _test_get_set_del_item(self, builder): @@ -387,4 +391,4 @@ class TestSmartList(unittest2.TestCase): self.assertEqual([4, 3, 2, 1.9, 1.8], child2) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index bcf1ae0..bc44f55 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -23,7 +23,11 @@ from __future__ import unicode_literals from sys import getdefaultencoding from types import GeneratorType -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import bytes, py3k, py32, range, str from mwparserfromhell.string_mixin import StringMixIn @@ -36,7 +40,7 @@ class _FakeString(StringMixIn): return self._data -class TestStringMixIn(unittest2.TestCase): +class TestStringMixIn(unittest.TestCase): """Test cases for the StringMixIn class.""" def test_docs(self): @@ -432,4 +436,4 @@ class TestStringMixIn(unittest2.TestCase): self.assertEqual("000123", str12.zfill(6)) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_tag.py b/tests/test_tag.py index f9d0e87..111511a 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Tag, Template, Text @@ -305,4 +309,4 @@ class TestTag(TreeEqualityTestCase): self.assertEqual('
', node) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_template.py b/tests/test_template.py index 1d3a547..584b02f 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text @@ -428,4 +432,4 @@ class TestTemplate(TreeEqualityTestCase): self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_text.py b/tests/test_text.py index c9f89cc..ee2e5c7 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -21,12 +21,16 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text -class TestText(unittest2.TestCase): +class TestText(unittest.TestCase): """Test cases for the Text node.""" def test_unicode(self): @@ -71,4 +75,4 @@ class TestText(unittest2.TestCase): self.assertIsInstance(node.value, str) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_tokens.py b/tests/test_tokens.py index cc2f366..3efce86 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -21,12 +21,16 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import py3k from mwparserfromhell.parser import tokens -class TestTokens(unittest2.TestCase): +class TestTokens(unittest.TestCase): """Test cases for the Token class and its subclasses.""" def test_issubclass(self): @@ -105,4 +109,4 @@ class TestTokens(unittest2.TestCase): self.assertEqual(token, eval(repr(token), vars(tokens))) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_utils.py b/tests/test_utils.py index f62c1a2..ddcc078 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.nodes import Template, Text from mwparserfromhell.utils import parse_anything @@ -59,4 +63,4 @@ class TestUtils(TreeEqualityTestCase): self.assertRaises(ValueError, parse_anything, ["foo", [object]]) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index d7119ed..9ff5949 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -24,7 +24,11 @@ from __future__ import unicode_literals from functools import partial import re from types import GeneratorType -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import py3k, str from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, @@ -432,4 +436,4 @@ class TestWikicode(TreeEqualityTestCase): self.assertEqual(expected.expandtabs(4), code.get_tree()) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 425f1ec..1bdc907 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -21,7 +21,11 @@ # SOFTWARE. from __future__ import unicode_literals -import unittest2 + +try: + import unittest2 as unittest +except ImportError: + import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink @@ -99,4 +103,4 @@ class TestWikilink(TreeEqualityTestCase): self.assertIs(None, node2.text) if __name__ == "__main__": - unittest2.main(verbosity=2) + unittest.main(verbosity=2) From 5c5fd6b3cbfecc822ed15256c9d7e4e478417bb0 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 9 Mar 2014 16:05:15 -0400 Subject: [PATCH 32/39] Fix a bug involving nested links (closes #61 and #62). --- CHANGELOG | 1 + docs/changelog.rst | 1 + mwparserfromhell/parser/contexts.py | 58 +++++++++++++++--------------- mwparserfromhell/parser/tokenizer.c | 10 +++--- mwparserfromhell/parser/tokenizer.h | 68 ++++++++++++++++++------------------ mwparserfromhell/parser/tokenizer.py | 10 +++--- tests/tokenizer/integration.mwtest | 28 +++++++++++++++ tests/tokenizer/wikilinks.mwtest | 32 ++++++++--------- 8 files changed, 117 insertions(+), 91 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 84dc148..2c94ebc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,7 @@ v0.4 (unreleased): - Given the frequency of issues with the (admittedly insufficient) tag parser, there's a temporary skip_style_tags argument to parse() that ignores '' and ''' until these issues are corrected. +- Fixed a parser bug involving nested wikilinks and external links. - C code cleanup and speed improvements. v0.3.2 (released September 1, 2013): diff --git a/docs/changelog.rst b/docs/changelog.rst index ada6e1e..6e1ce47 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -24,6 +24,7 @@ Unreleased there's a temporary *skip_style_tags* argument to :py:meth:`~mwparserfromhell.parse` that ignores ``''`` and ``'''`` until these issues are corrected. +- Fixed a parser bug involving nested wikilinks and external links. - C code cleanup and speed improvements. v0.3.2 diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index 1d9adf1..28023b5 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -55,7 +55,6 @@ Local (stack-specific) contexts: * :py:const:`EXT_LINK_URI` * :py:const:`EXT_LINK_TITLE` - * :py:const:`EXT_LINK_BRACKETS` * :py:const:`HEADING` @@ -100,7 +99,8 @@ Aggregate contexts: * :py:const:`FAIL` * :py:const:`UNSAFE` * :py:const:`DOUBLE` -* :py:const:`INVALID_LINK` +* :py:const:`NO_WIKILINKS` +* :py:const:`NO_EXT_LINKS` """ @@ -121,38 +121,37 @@ WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT EXT_LINK_URI = 1 << 7 EXT_LINK_TITLE = 1 << 8 -EXT_LINK_BRACKETS = 1 << 9 -EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE + EXT_LINK_BRACKETS - -HEADING_LEVEL_1 = 1 << 10 -HEADING_LEVEL_2 = 1 << 11 -HEADING_LEVEL_3 = 1 << 12 -HEADING_LEVEL_4 = 1 << 13 -HEADING_LEVEL_5 = 1 << 14 -HEADING_LEVEL_6 = 1 << 15 +EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE + +HEADING_LEVEL_1 = 1 << 9 +HEADING_LEVEL_2 = 1 << 10 +HEADING_LEVEL_3 = 1 << 11 +HEADING_LEVEL_4 = 1 << 12 +HEADING_LEVEL_5 = 1 << 13 +HEADING_LEVEL_6 = 1 << 14 HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 + HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6) -TAG_OPEN = 1 << 16 -TAG_ATTR = 1 << 17 -TAG_BODY = 1 << 18 -TAG_CLOSE = 1 << 19 +TAG_OPEN = 1 << 15 +TAG_ATTR = 1 << 16 +TAG_BODY = 1 << 17 +TAG_CLOSE = 1 << 18 TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE -STYLE_ITALICS = 1 << 20 -STYLE_BOLD = 1 << 21 -STYLE_PASS_AGAIN = 1 << 22 -STYLE_SECOND_PASS = 1 << 23 +STYLE_ITALICS = 1 << 19 +STYLE_BOLD = 1 << 20 +STYLE_PASS_AGAIN = 1 << 21 +STYLE_SECOND_PASS = 1 << 22 STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS -DL_TERM = 1 << 24 +DL_TERM = 1 << 23 -HAS_TEXT = 1 << 25 -FAIL_ON_TEXT = 1 << 26 -FAIL_NEXT = 1 << 27 -FAIL_ON_LBRACE = 1 << 28 -FAIL_ON_RBRACE = 1 << 29 -FAIL_ON_EQUALS = 1 << 30 +HAS_TEXT = 1 << 24 +FAIL_ON_TEXT = 1 << 25 +FAIL_NEXT = 1 << 26 +FAIL_ON_LBRACE = 1 << 27 +FAIL_ON_RBRACE = 1 << 28 +FAIL_ON_EQUALS = 1 << 29 SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + FAIL_ON_RBRACE + FAIL_ON_EQUALS) @@ -163,7 +162,8 @@ GL_HEADING = 1 << 0 # Aggregate contexts: FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE -UNSAFE = (TEMPLATE_NAME + WIKILINK + EXT_LINK_TITLE + TEMPLATE_PARAM_KEY + - ARGUMENT_NAME + TAG_CLOSE) +UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + + TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE) DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE -INVALID_LINK = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK + EXT_LINK +NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI +NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 88f6490..de58e72 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1158,7 +1158,7 @@ Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer* self, PyObject* link) */ static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) { - #define INVALID_CONTEXT self->topstack->context & AGG_INVALID_LINK + #define INVALID_CONTEXT self->topstack->context & AGG_NO_EXT_LINKS #define NOT_A_LINK \ if (!brackets && self->topstack->context & LC_DLTERM) \ return Tokenizer_handle_dl_term(self); \ @@ -2440,10 +2440,8 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) { if (context & LC_FAIL_NEXT) return -1; - if (context & LC_WIKILINK) { - if (context & LC_WIKILINK_TEXT) - return (data == '[' && Tokenizer_READ(self, 1) == '[') ? -1 : 0; - else if (data == ']' || data == '{') + if (context & LC_WIKILINK_TITLE) { + if (data == ']' || data == '{') self->topstack->context |= LC_FAIL_NEXT; else if (data == '\n' || data == '[' || data == '}') return -1; @@ -2577,7 +2575,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) return NULL; } else if (this == next && next == '[' && Tokenizer_CAN_RECURSE(self)) { - if (!(this_context & AGG_INVALID_LINK)) { + if (!(this_context & AGG_NO_WIKILINKS)) { if (Tokenizer_parse_wikilink(self)) return NULL; } diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 41d1e0b..032480d 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -121,40 +121,39 @@ static PyObject* TagCloseClose; #define LC_WIKILINK_TITLE 0x00000020 #define LC_WIKILINK_TEXT 0x00000040 -#define LC_EXT_LINK 0x00000380 +#define LC_EXT_LINK 0x00000180 #define LC_EXT_LINK_URI 0x00000080 #define LC_EXT_LINK_TITLE 0x00000100 -#define LC_EXT_LINK_BRACKETS 0x00000200 - -#define LC_HEADING 0x0000FC00 -#define LC_HEADING_LEVEL_1 0x00000400 -#define LC_HEADING_LEVEL_2 0x00000800 -#define LC_HEADING_LEVEL_3 0x00001000 -#define LC_HEADING_LEVEL_4 0x00002000 -#define LC_HEADING_LEVEL_5 0x00004000 -#define LC_HEADING_LEVEL_6 0x00008000 - -#define LC_TAG 0x000F0000 -#define LC_TAG_OPEN 0x00010000 -#define LC_TAG_ATTR 0x00020000 -#define LC_TAG_BODY 0x00040000 -#define LC_TAG_CLOSE 0x00080000 - -#define LC_STYLE 0x00F00000 -#define LC_STYLE_ITALICS 0x00100000 -#define LC_STYLE_BOLD 0x00200000 -#define LC_STYLE_PASS_AGAIN 0x00400000 -#define LC_STYLE_SECOND_PASS 0x00800000 - -#define LC_DLTERM 0x01000000 - -#define LC_SAFETY_CHECK 0x7E000000 -#define LC_HAS_TEXT 0x02000000 -#define LC_FAIL_ON_TEXT 0x04000000 -#define LC_FAIL_NEXT 0x08000000 -#define LC_FAIL_ON_LBRACE 0x10000000 -#define LC_FAIL_ON_RBRACE 0x20000000 -#define LC_FAIL_ON_EQUALS 0x40000000 + +#define LC_HEADING 0x00007E00 +#define LC_HEADING_LEVEL_1 0x00000200 +#define LC_HEADING_LEVEL_2 0x00000400 +#define LC_HEADING_LEVEL_3 0x00000800 +#define LC_HEADING_LEVEL_4 0x00001000 +#define LC_HEADING_LEVEL_5 0x00002000 +#define LC_HEADING_LEVEL_6 0x00004000 + +#define LC_TAG 0x00078000 +#define LC_TAG_OPEN 0x00008000 +#define LC_TAG_ATTR 0x00010000 +#define LC_TAG_BODY 0x00020000 +#define LC_TAG_CLOSE 0x00040000 + +#define LC_STYLE 0x00780000 +#define LC_STYLE_ITALICS 0x00080000 +#define LC_STYLE_BOLD 0x00100000 +#define LC_STYLE_PASS_AGAIN 0x00200000 +#define LC_STYLE_SECOND_PASS 0x00400000 + +#define LC_DLTERM 0x00800000 + +#define LC_SAFETY_CHECK 0x3F000000 +#define LC_HAS_TEXT 0x01000000 +#define LC_FAIL_ON_TEXT 0x02000000 +#define LC_FAIL_NEXT 0x04000000 +#define LC_FAIL_ON_LBRACE 0x08000000 +#define LC_FAIL_ON_RBRACE 0x10000000 +#define LC_FAIL_ON_EQUALS 0x20000000 /* Global contexts: */ @@ -163,9 +162,10 @@ static PyObject* TagCloseClose; /* Aggregate contexts: */ #define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE) -#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) +#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) #define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) -#define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK | LC_EXT_LINK) +#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) +#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) /* Tag contexts: */ diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 269cee2..29a7e25 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -467,7 +467,7 @@ class Tokenizer(object): reset = self._head self._head += 1 try: - bad_context = self._context & contexts.INVALID_LINK + bad_context = self._context & contexts.NO_EXT_LINKS if bad_context or not self._can_recurse(): raise BadRoute() link, extra, delta = self._really_parse_external_link(brackets) @@ -990,10 +990,8 @@ class Tokenizer(object): context = self._context if context & contexts.FAIL_NEXT: return False - if context & contexts.WIKILINK: - if context & contexts.WIKILINK_TEXT: - return not (this == self._read(1) == "[") - elif this == "]" or this == "{": + if context & contexts.WIKILINK_TITLE: + if this == "]" or this == "{": self._context |= contexts.FAIL_NEXT elif this == "\n" or this == "[" or this == "}": return False @@ -1083,7 +1081,7 @@ class Tokenizer(object): else: self._emit_text("}") elif this == next == "[" and self._can_recurse(): - if not self._context & contexts.INVALID_LINK: + if not self._context & contexts.NO_WIKILINKS: self._parse_wikilink() else: self._emit_text("[") diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index 37ef9f1..bf19f4d 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -150,3 +150,31 @@ name: comment_inside_bracketed_link label: an HTML comment inside a bracketed external link input: "[http://example.com/foobar]" output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()] + +--- + +name: wikilink_inside_external_link +label: a wikilink inside an external link, which the parser considers valid (see issue #61) +input: "[http://example.com/foo Foo [[Bar]]]" +output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), ExternalLinkSeparator(), Text(text="Foo "), WikilinkOpen(), Text(text="Bar"), WikilinkClose(), ExternalLinkClose()] + +--- + +name: external_link_inside_wikilink +label: an external link inside a wikilink, valid in the case of images (see issue #62) +input: "[[File:Example.png|thumb|http://example.com]]" +output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), WikilinkClose()] + +--- + +name: external_link_inside_wikilink_brackets +label: an external link with brackets inside a wikilink +input: "[[File:Example.png|thumb|[http://example.com Example]]]" +output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose(), WikilinkClose()] + +--- + +name: external_link_inside_wikilink_title +label: an external link inside a wikilink title, which is invalid +input: "[[File:Example.png http://example.com]]" +output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()] diff --git a/tests/tokenizer/wikilinks.mwtest b/tests/tokenizer/wikilinks.mwtest index 8eb381a..ce0ec79 100644 --- a/tests/tokenizer/wikilinks.mwtest +++ b/tests/tokenizer/wikilinks.mwtest @@ -54,6 +54,20 @@ output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar[b --- +name: nested +label: a wikilink nested within another +input: "[[foo|[[bar]]]]" +output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()] + +--- + +name: nested_padding +label: a wikilink nested within another, separated by other data +input: "[[foo|a[[b]]c]]" +output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()] + +--- + name: invalid_newline label: invalid wikilink: newline as only content input: "[[\n]]" @@ -103,27 +117,13 @@ output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), --- -name: invalid_nested_text -label: invalid wikilink: a wikilink nested within the value of another +name: invalid_nested_no_close +label: invalid wikilink: a wikilink nested within the value of another, missing a pair of closing brackets input: "[[foo|[[bar]]" output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose()] --- -name: invalid_nested_text_2 -label: invalid wikilink: a wikilink nested within the value of another, two pairs of closing brackets -input: "[[foo|[[bar]]]]" -output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")] - ---- - -name: invalid_nested_text_padding -label: invalid wikilink: a wikilink nested within the value of another, separated by other data -input: "[[foo|a[[b]]c]]" -output: [Text(text="[[foo|a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c]]")] - ---- - name: incomplete_open_only label: incomplete wikilinks: just an open input: "[[" From cf7ab558bfd2f6b0bff2a9493e7e6f254ae9d341 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 20 Mar 2014 00:58:37 -0400 Subject: [PATCH 33/39] Make Wikicode.get_sections() much faster (closes #63). Patch mostly by @spointy. --- CHANGELOG | 4 +- docs/changelog.rst | 4 +- mwparserfromhell/wikicode.py | 119 ++++++++++++++++++++++++++++--------------- 3 files changed, 83 insertions(+), 44 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 2c94ebc..a396f82 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,8 +6,8 @@ v0.4 (unreleased): is a breaking change if you rely on the default behavior. - The 'matches' argument of Wikicode's filter methods now accepts a function (taking one argument, a Node, and returning a bool) in addition to a regex. -- Re-added 'flat' argument to Wikicode.get_sections(). -- Wikicode.get_sections() now returns sections in the correct order. +- Re-added 'flat' argument to Wikicode.get_sections(), fixed the order in which + it returns sections, and made it faster. - Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects instead of just a single string or Wikicode. - Given the frequency of issues with the (admittedly insufficient) tag parser, diff --git a/docs/changelog.rst b/docs/changelog.rst index 6e1ce47..82f06c4 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -15,8 +15,8 @@ Unreleased - The *matches* argument of :py:class:`Wikicode's <.Wikicode>` :py:meth:`.filter` methods now accepts a function (taking one argument, a :py:class:`.Node`, and returning a bool) in addition to a regex. -- Re-added *flat* argument to :py:meth:`.Wikicode.get_sections`. -- :py:meth:`.Wikicode.get_sections` now returns sections in the correct order. +- Re-added *flat* argument to :py:meth:`.Wikicode.get_sections`, fixed the + order in which it returns sections, and made it faster. - :py:meth:`.Wikicode.matches` now accepts a tuple or list of strings/:py:class:`.Wikicode` objects instead of just a single string or :py:class:`.Wikicode`. diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 312f3a0..4f5cd1a 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -68,6 +68,41 @@ class Wikicode(StringMixIn): substring = "".join(nodes).replace(old, new) code.nodes[index] = parse_anything(substring).nodes + @staticmethod + def _build_matcher(matches, flags): + """Helper for :py:meth:`_indexed_ifilter` and others. + + If *matches* is a function, return it. If it's a regex, return a + wrapper around it that can be called with a node to do a search. If + it's ``None``, return a function that always returns ``True``. + """ + if matches: + if callable(matches): + return matches + return lambda obj: re.search(matches, str(obj), flags) # r + return lambda obj: True + + def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS, + forcetype=None): + """Iterate over nodes and their corresponding indices in the node list. + + The arguments are interpreted as for :py:meth:`ifilter`. For each tuple + ``(i, node)`` yielded by this method, ``self.index(node) == i``. Note + that if *recursive* is ``True``, ``self.nodes[i]`` might not be the + node itself, but will still contain it. + """ + match = self._build_matcher(matches, flags) + if recursive: + def getter(i, node): + for ch in self._get_children(node): + yield (i, ch) + inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes))) + else: + inodes = enumerate(self.nodes) + for i, node in inodes: + if (not forcetype or isinstance(node, forcetype)) and match(node): + yield (i, node) + def _do_strong_search(self, obj, recursive=True): """Search for the specific element *obj* within the node list. @@ -411,17 +446,8 @@ class Wikicode(StringMixIn): :py:const:`re.DOTALL`, and :py:const:`re.UNICODE`, but custom flags can be specified by passing *flags*. """ - if matches and not callable(matches): - pat, matches = matches, lambda obj: re.search(pat, str(obj), flags) - if recursive: - getter = self._get_children - nodes = chain.from_iterable(getter(n) for n in self.nodes) - else: - nodes = self.nodes - for node in nodes: - if not forcetype or isinstance(node, forcetype): - if not matches or matches(node): - yield node + return (node for i, node in + self._indexed_ifilter(recursive, matches, flags, forcetype)) def filter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None): @@ -442,10 +468,10 @@ class Wikicode(StringMixIn): Each section contains all of its subsections, unless *flat* is ``True``. If *levels* is given, it should be a iterable of integers; only sections whose heading levels are within it will be returned. If - *matches* is given, it should be a regex to be matched against the - titles of section headings; only sections whose headings match the - regex will be included. *flags* can be used to override the default - regex flags (see :py:meth:`ifilter`) if *matches* is used. + *matches* is given, it should be either a function or a regex; only + sections whose headings match it (without the surrounding equal signs) + will be included. *flags* can be used to override the default regex + flags (see :py:meth:`ifilter`) if a regex *matches* is used. If *include_lead* is ``True``, the first, lead section (without a heading) will be included in the list; ``False`` will not include it; @@ -454,35 +480,48 @@ class Wikicode(StringMixIn): :py:class:`~.Heading` object will be included; otherwise, this is skipped. """ - if matches: - matches = r"^(=+?)\s*" + matches + r"\s*\1$" - headings = self.filter_headings(recursive=False, matches=matches, - flags=flags) - if levels: - headings = [head for head in headings if head.level in levels] - - sections = [] + title_matcher = self._build_matcher(matches, flags) + matcher = lambda heading: (title_matcher(heading.title) and + (not levels or heading.level in levels)) + iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading) + sections = [] # Tuples of (index_of_first_node, section) + open_headings = [] # Tuples of (index, heading), where index and + # heading.level are both monotonically increasing + + # Add the lead section if appropriate: if include_lead or not (include_lead is not None or matches or levels): - iterator = self.ifilter_headings(recursive=False) + itr = self._indexed_ifilter(recursive=False, forcetype=Heading) try: - first = self.index(next(iterator)) - sections.append(Wikicode(self.nodes[:first])) + first = next(itr)[0] + sections.append((0, Wikicode(self.nodes[:first]))) except StopIteration: # No headings in page - sections.append(Wikicode(self.nodes[:])) - - for heading in headings: - start = self.index(heading) - i = start + 1 - if not include_headings: - start += 1 - while i < len(self.nodes): - node = self.nodes[i] - if isinstance(node, Heading): - if flat or node.level <= heading.level: + sections.append((0, Wikicode(self.nodes[:]))) + + # Iterate over headings, adding sections to the list as they end: + for i, heading in iheadings: + if flat: # With flat, all sections close at the next heading + newly_closed, open_headings = open_headings, [] + else: # Otherwise, figure out which sections have closed, if any + closed_start_index = len(open_headings) + for j, (start, last_heading) in enumerate(open_headings): + if heading.level <= last_heading.level: + closed_start_index = j break - i += 1 - sections.append(Wikicode(self.nodes[start:i])) - return sections + newly_closed = open_headings[closed_start_index:] + del open_headings[closed_start_index:] + for start, closed_heading in newly_closed: + if matcher(closed_heading): + sections.append((start, Wikicode(self.nodes[start:i]))) + start = i if include_headings else (i + 1) + open_headings.append((start, heading)) + + # Add any remaining open headings to the list of sections: + for start, heading in open_headings: + if matcher(heading): + sections.append((start, Wikicode(self.nodes[start:]))) + + # Ensure that earlier sections are earlier in the returned list: + return [section for i, section in sorted(sections)] def strip_code(self, normalize=True, collapse=True): """Return a rendered string without unprintable code such as templates. From f616e6b3b76ab5d9fb18fdc869129c34d8870c17 Mon Sep 17 00:00:00 2001 From: Marcio Faustino Date: Thu, 20 Mar 2014 08:52:47 +0100 Subject: [PATCH 34/39] Avoid custom constructor and sub-class `dict` directly instead. Before: $ python -m timeit -s 'from mwparserfromhell.parser.tokens import Text' 'Text(text = "abc")' 1000000 loops, best of 3: 0.91 usec per loop After: $ python -m timeit -s 'from mwparserfromhell.parser.tokens import Text' 'Text(text = "abc")' 1000000 loops, best of 3: 0.223 usec per loop --- mwparserfromhell/parser/tokens.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index a152abe..40e5158 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -34,15 +34,12 @@ from ..compat import py3k, str __all__ = ["Token"] -class Token(object): +class Token (dict): """A token stores the semantic meaning of a unit of wikicode.""" - def __init__(self, **kwargs): - super(Token, self).__setattr__("_kwargs", kwargs) - def __repr__(self): args = [] - for key, value in self._kwargs.items(): + for key, value in self.items(): if isinstance(value, str) and len(value) > 100: args.append(key + "=" + repr(value[:97] + "...")) else: @@ -50,18 +47,19 @@ class Token(object): return "{0}({1})".format(type(self).__name__, ", ".join(args)) def __eq__(self, other): - if isinstance(other, type(self)): - return self._kwargs == other._kwargs - return False + return isinstance(other, type(self)) and dict.__eq__(self, other) + + def __ne__(self, other): + return not self.__eq__(other) def __getattr__(self, key): - return self._kwargs.get(key) + return self.get(key) def __setattr__(self, key, value): - self._kwargs[key] = value + self[key] = value def __delattr__(self, key): - del self._kwargs[key] + del self[key] def make(name): From 40e859e65f7bac781fa841dba3da542e580ac1f7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Mar 2014 02:02:54 -0400 Subject: [PATCH 35/39] Add Python 3.4 to .travis.yml. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 31090f2..5fe3760 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,5 +4,6 @@ python: - "2.7" - "3.2" - "3.3" + - "3.4" install: python setup.py build script: python setup.py test -q From fa89cd8da81cbe56a85da43cfe4129a96d1bccfe Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Mar 2014 02:10:03 -0400 Subject: [PATCH 36/39] Python 3.4 is good. --- CHANGELOG | 2 +- docs/changelog.rst | 2 +- setup.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a396f82..b4d810c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,6 @@ v0.4 (unreleased): -- Added support for Python 2.6. +- Added support for Python 2.6 and 3.4. - Template.has() is now passed 'ignore_empty=False' by default instead of True. This fixes a bug when adding parameters to templates with empty fields, and is a breaking change if you rely on the default behavior. diff --git a/docs/changelog.rst b/docs/changelog.rst index 82f06c4..4a52f33 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,7 +7,7 @@ v0.4 Unreleased (`changes `__): -- Added support for Python 2.6. +- Added support for Python 2.6 and 3.4. - :py:meth:`.Template.has` is now passed *ignore_empty=False* by default instead of *True*. This fixes a bug when adding parameters to templates with empty fields, **and is a breaking change if you rely on the default diff --git a/setup.py b/setup.py index eaccdb2..5a45902 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,7 @@ setup( "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", "Topic :: Text Processing :: Markup" ], ) From 81fddd5e84908e9e23f94c6f78b8652215edc91d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 11 Apr 2014 22:12:43 -0400 Subject: [PATCH 37/39] Apparently Travis is still kinda lame. --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5fe3760..31090f2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,5 @@ python: - "2.7" - "3.2" - "3.3" - - "3.4" install: python setup.py build script: python setup.py test -q From 454890ab0db70379303c7ea7e4b738cf291425b3 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 16 Apr 2014 17:23:03 -0400 Subject: [PATCH 38/39] Fix docstring for Wikicode.strip_code(). --- mwparserfromhell/wikicode.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 4f5cd1a..44515a6 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -527,11 +527,11 @@ class Wikicode(StringMixIn): """Return a rendered string without unprintable code such as templates. The way a node is stripped is handled by the - :py:meth:`~.Node.__showtree__` method of :py:class:`~.Node` objects, - which generally return a subset of their nodes or ``None``. For - example, templates and tags are removed completely, links are stripped - to just their display part, headings are stripped to just their title. - If *normalize* is ``True``, various things may be done to strip code + :py:meth:`~.Node.__strip__` method of :py:class:`~.Node` objects, which + generally return a subset of their nodes or ``None``. For example, + templates and tags are removed completely, links are stripped to just + their display part, headings are stripped to just their title. If + *normalize* is ``True``, various things may be done to strip code further, such as converting HTML entities like ``Σ``, ``Σ``, and ``Σ`` to ``Σ``. If *collapse* is ``True``, we will try to remove excess whitespace as well (three or more newlines are converted From 9953fd55850beec02741f7b9249e2865f002b99c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 22 Apr 2014 16:15:31 -0400 Subject: [PATCH 39/39] release/0.3.3 --- CHANGELOG | 2 +- docs/changelog.rst | 12 ++++++------ mwparserfromhell/__init__.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index b4d810c..9faf6b7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -v0.4 (unreleased): +v0.3.3 (released April 22, 2014): - Added support for Python 2.6 and 3.4. - Template.has() is now passed 'ignore_empty=False' by default instead of True. diff --git a/docs/changelog.rst b/docs/changelog.rst index 4a52f33..9efc022 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,11 +1,11 @@ Changelog ========= -v0.4 ----- +v0.3.3 +------ -Unreleased -(`changes `__): +`Released April 22, 2014 `_ +(`changes `__): - Added support for Python 2.6 and 3.4. - :py:meth:`.Template.has` is now passed *ignore_empty=False* by default @@ -22,8 +22,8 @@ Unreleased :py:class:`.Wikicode`. - Given the frequency of issues with the (admittedly insufficient) tag parser, there's a temporary *skip_style_tags* argument to - :py:meth:`~mwparserfromhell.parse` that ignores ``''`` and ``'''`` until - these issues are corrected. + :py:meth:`~.Parser.parse` that ignores ``''`` and ``'''`` until these issues + are corrected. - Fixed a parser bug involving nested wikilinks and external links. - C code cleanup and speed improvements. diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index e7459e3..469e9a6 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -31,7 +31,7 @@ from __future__ import unicode_literals __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.4.dev" +__version__ = "0.3.3" __email__ = "ben.kurtovic@gmail.com" from . import (compat, definitions, nodes, parser, smart_list, string_mixin,