From c7617c3eb3cf1865168a6566dc4dd1eb951a6c1b Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 24 Aug 2013 20:19:11 -0400 Subject: [PATCH 1/9] Bump version to 0.4.dev; also update README and setup.py. --- README.rst | 6 ++++-- mwparserfromhell/__init__.py | 2 +- setup.py | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index b5fd912..5b4cfe1 100644 --- a/README.rst +++ b/README.rst @@ -9,8 +9,8 @@ mwparserfromhell that provides an easy-to-use and outrageously powerful parser for MediaWiki_ wikicode. It supports Python 2 and Python 3. -Developed by Earwig_ with help from `Σ`_. Full documentation is available on -ReadTheDocs_. +Developed by Earwig_ with contributions from `Σ`_, Legoktm_, and others. +Full documentation is available on ReadTheDocs_. Development occurs on GitHub_. Installation ------------ @@ -148,6 +148,8 @@ following code (via the API_):: .. _ReadTheDocs: http://mwparserfromhell.readthedocs.org .. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig .. _Σ: http://en.wikipedia.org/wiki/User:%CE%A3 +.. _Legoktm: http://en.wikipedia.org/wiki/User:Legoktm +.. _GitHub: https://github.com/earwig/mwparserfromhell .. _Python Package Index: http://pypi.python.org .. _StackOverflow question: http://stackoverflow.com/questions/2817869/error-unable-to-find-vcvarsall-bat .. _get pip: http://pypi.python.org/pypi/pip diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 6a45a11..3c011d0 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -31,7 +31,7 @@ from __future__ import unicode_literals __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.3" +__version__ = "0.4.dev" __email__ = "ben.kurtovic@verizon.net" from . import (compat, definitions, nodes, parser, smart_list, string_mixin, diff --git a/setup.py b/setup.py index 3ef7e0e..d2ad17d 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,7 @@ setup( "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Topic :: Text Processing :: Markup" ], From 0f1fca75d84fb676d53781705f66f9392f8c043c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 28 Aug 2013 17:55:17 -0400 Subject: [PATCH 2/9] Typo fix. --- mwparserfromhell/wikicode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index c3249d9..08fd469 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -409,7 +409,7 @@ class Wikicode(StringMixIn): Each section contains all of its subsections. If *levels* is given, it should be a iterable of integers; only sections whose heading levels - are within it will be returned.If *matches* is given, it should be a + are within it will be returned. If *matches* is given, it should be a regex to be matched against the titles of section headings; only sections whose headings match the regex will be included. *flags* can be used to override the default regex flags (see :py:meth:`ifilter`) if From 7b50888e137954fc7c955e0f3bfe74b2f24b1462 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 28 Aug 2013 18:26:50 -0400 Subject: [PATCH 3/9] Tests for external links inside other markup. --- tests/tokenizer/integration.mwtest | 63 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index 083b12c..82c84bd 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -72,3 +72,66 @@ name: link_inside_dl_2 label: an external link inside a def list, such that the external link is not parsed input: ";;;malito:example" output: [TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), TagOpenOpen(wiki_markup=";"), Text(text="dt"), TagCloseSelfclose(), Text(text="malito"), TagOpenOpen(wiki_markup=":"), Text(text="dd"), TagCloseSelfclose(), Text(text="example")] + +--- + +name: link_inside_template +label: an external link nested inside a template, before the end +input: "{{URL|http://example.com}}" +output: [TemplateOpen(), Text(text="URL"), TemplateParamSeparator(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), TemplateClose()] + +--- + +name: link_inside_template_2 +label: an external link nested inside a template, before a separator +input: "{{URL|http://example.com|foobar}}" +output: [TemplateOpen(), Text(text="URL"), TemplateParamSeparator(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), TemplateParamSeparator(), Text(text="foobar"), TemplateClose()] + +--- + +name: link_inside_template_3 +label: an external link nested inside a template, before an equal sign +input: "{{URL|http://example.com=foobar}}" +output: [TemplateOpen(), Text(text="URL"), TemplateParamSeparator(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), TemplateParamEquals(), Text(text="foobar"), TemplateClose()] + +--- + +name: link_inside_argument +label: an external link nested inside an argument +input: "{{{URL|http://example.com}}}" +output: [ArgumentOpen(), Text(text="URL"), ArgumentSeparator(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), ArgumentClose()] + +--- + +name: link_inside_heading +label: an external link nested inside a heading +input: "==http://example.com==" +output: [HeadingStart(level=2), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), HeadingEnd()] + +--- + +name: link_inside_tag_body +label: an external link nested inside the body of a tag +input: "http://example.com" +output: [TagOpenOpen(), Text(text="ref"), TagCloseOpen(padding=""), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), TagOpenClose(), Text(text="ref"), TagCloseClose()] + +--- + +name: link_inside_tag_style +label: an external link nested inside style tags +input: "''http://example.com''" +output: [TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), TagOpenClose(), Text(text="i"), TagCloseClose()] + +--- + +name: style_tag_inside_link +label: style tags disrupting an external link +input: "http://example.com/foo''bar''" +output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/foo"), ExternalLinkClose(), TagOpenOpen(wiki_markup="''"), Text(text="i"), TagCloseOpen(), Text(text="bar"), TagOpenClose(), Text(text="i"), TagCloseClose()] + +--- + +name: comment_inside_link +label: an HTML comment inside an external link +input: "http://example.com/foobar" +output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()] From d6e03800faaa1652affbe8491f888fc18548dd21 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 28 Aug 2013 18:58:55 -0400 Subject: [PATCH 4/9] Include tests for bracketed links too. --- tests/tokenizer/integration.mwtest | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/tokenizer/integration.mwtest b/tests/tokenizer/integration.mwtest index 82c84bd..37ef9f1 100644 --- a/tests/tokenizer/integration.mwtest +++ b/tests/tokenizer/integration.mwtest @@ -135,3 +135,18 @@ name: comment_inside_link label: an HTML comment inside an external link input: "http://example.com/foobar" output: [ExternalLinkOpen(brackets=False), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()] + +--- + +name: bracketed_link_inside_template +label: a bracketed external link nested inside a template, before the end +input: "{{URL|[http://example.com}}]" +output: [Text(text="{{URL|"), ExternalLinkOpen(brackets=True), Text(text="http://example.com}}"), ExternalLinkClose()] + + +--- + +name: comment_inside_bracketed_link +label: an HTML comment inside a bracketed external link +input: "[http://example.com/foobar]" +output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()] From 565a04256f3634e4f0276e050f70e29faf643ddf Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 28 Aug 2013 19:08:40 -0400 Subject: [PATCH 5/9] Proper sentinel handling with free links in the Python tokenizer. --- mwparserfromhell/parser/tokenizer.py | 38 +++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 8fae729..bcc00ef 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -315,7 +315,7 @@ class Tokenizer(object): def _parse_bracketed_uri_scheme(self): """Parse the URI scheme of a bracket-enclosed external link.""" - self._push(contexts.EXT_LINK_URI) + self._push(self._context | contexts.EXT_LINK_URI) if self._read() == self._read(1) == "/": self._emit_text("//") self._head += 2 @@ -358,7 +358,7 @@ class Tokenizer(object): slashes = self._read() == self._read(1) == "/" if not is_scheme(scheme, slashes): raise BadRoute() - self._push(contexts.EXT_LINK_URI) + self._push(self._context | contexts.EXT_LINK_URI) self._emit_text(scheme) self._emit_text(":") if slashes: @@ -385,6 +385,21 @@ class Tokenizer(object): self._emit_text(this) return punct, tail + def _is_free_link_end(self, this, next): + """Return whether the current head is the end of a free link.""" + # Built from _parse()'s end sentinels: + after, ctx = self._read(2), self._context + return (this is self.END or this in ("\n", "[", "]") or + this == "|" and ctx & contexts.TEMPLATE or + this == "=" and ctx & contexts.TEMPLATE_PARAM_KEY or + this == next == "}" and ctx & contexts.TEMPLATE or + this == next == after == "}" and ctx & contexts.ARGUMENT or + this == "=" and ctx & contexts.HEADING or + this == "<" and next == "/" and after is not self.END or + this == "<" and next != "!" and not ctx & contexts.TAG_CLOSE or + this == ">" and ctx & contexts.TAG_CLOSE or + this == next == "'") + def _really_parse_external_link(self, brackets): """Really parse an external link.""" if brackets: @@ -399,27 +414,28 @@ class Tokenizer(object): tail = "" while True: this, next = self._read(), self._read(1) - if this is self.END or this == "\n": - if brackets: - self._fail_route() + if not brackets and self._is_free_link_end(this, next): return self._pop(), tail, -1 + elif this is self.END or this == "\n": + self._fail_route() elif this == next == "{" and self._can_recurse(): if tail: self._emit_text(tail) tail = "" self._parse_template_or_argument() - elif this == "[": - if brackets: - self._emit_text("[") - else: - return self._pop(), tail, -1 elif this == "]": - return self._pop(), tail, 0 if brackets else -1 + return self._pop(), tail, 0 elif this == "&": if tail: self._emit_text(tail) tail = "" self._parse_entity() + elif (this == "<" and next == "!" and self._read(2) == + self._read(3) == "-"): + if tail: + self._emit_text(tail) + tail = "" + self._parse_comment() elif " " in this: before, after = this.split(" ", 1) if brackets: From 1bf9868753aaf3dcab715a1cf43d8ac0c94678d9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 28 Aug 2013 21:02:49 -0400 Subject: [PATCH 6/9] Proper sentinel handling with free links in the C tokenizer. --- mwparserfromhell/parser/tokenizer.c | 55 +++++++++++++++++++++++++------------ mwparserfromhell/parser/tokenizer.h | 1 + 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index c9527ab..e0a2adb 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -870,7 +870,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) Py_UNICODE this; int slashes, i; - if (Tokenizer_push(self, LC_EXT_LINK_URI)) + if (Tokenizer_push(self, self->topstack->context | LC_EXT_LINK_URI)) return -1; if (Tokenizer_READ(self, 0) == *"/" && Tokenizer_READ(self, 1) == *"/") { if (Tokenizer_emit_text(self, "//")) @@ -982,7 +982,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) return 0; } Py_DECREF(scheme); - if (Tokenizer_push(self, LC_EXT_LINK_URI)) { + if (Tokenizer_push(self, self->topstack->context | LC_EXT_LINK_URI)) { Textbuffer_dealloc(scheme_buffer); return -1; } @@ -1028,6 +1028,29 @@ Tokenizer_handle_free_link_text(Tokenizer* self, int* parens, } /* + Return whether the current head is the end of a free link. +*/ +static int +Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next) +{ + // Built from Tokenizer_parse()'s end sentinels: + Py_UNICODE after = Tokenizer_READ(self, 2); + int ctx = self->topstack->context; + + return ((this == *"" || this == *"\n" || this == *"[" || this == *"]") || + (this == *"|" && ctx & LC_TEMPLATE) || + (this == *"=" && ctx & LC_TEMPLATE_PARAM_KEY) || + (this == *"}" && next == *"}" && ctx & LC_TEMPLATE) || + (this == *"}" && next == *"}" && after == *"}" + && ctx & LC_ARGUMENT) || + (this == *"=" && ctx & LC_HEADING) || + (this == *"<" && next == *"/" && after != *"") || + (this == *"<" && next != *"!" && !(ctx & LC_TAG_CLOSE)) || + (this == *">" && ctx & LC_TAG_CLOSE) || + (this == *"'" && next == *"'")); +} + +/* Really parse an external link. */ static PyObject* @@ -1050,35 +1073,31 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, while (1) { this = Tokenizer_READ(self, 0); next = Tokenizer_READ(self, 1); - if (this == *"" || this == *"\n") { - if (brackets) - return Tokenizer_fail_route(self); + if (!brackets && Tokenizer_is_free_link(self, this, next)) { self->head--; return Tokenizer_pop(self); } - if (this == *"{" && next == *"{" && Tokenizer_CAN_RECURSE(self)) { + else if (this == *"" || this == *"\n") + return Tokenizer_fail_route(self); + else if (this == *"{" && next == *"{" && Tokenizer_CAN_RECURSE(self)) { PUSH_TAIL_BUFFER(*extra, NULL) if (Tokenizer_parse_template_or_argument(self)) return NULL; } - else if (this == *"[") { - if (!brackets) { - self->head--; - return Tokenizer_pop(self); - } - if (Tokenizer_emit_char(self, *"[")) - return NULL; - } - else if (this == *"]") { - if (!brackets) - self->head--; + else if (this == *"]") return Tokenizer_pop(self); - } else if (this == *"&") { PUSH_TAIL_BUFFER(*extra, NULL) if (Tokenizer_parse_entity(self)) return NULL; } + else if (this == *"<" && next == *"!" + && Tokenizer_READ(self, 2) == *"-" + && Tokenizer_READ(self, 3) == *"-") { + PUSH_TAIL_BUFFER(*extra, NULL) + if (Tokenizer_parse_comment(self)) + return NULL; + } else if (this == *" ") { if (brackets) { if (Tokenizer_emit(self, ExternalLinkSeparator)) diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index da3c57a..48bdf26 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -261,6 +261,7 @@ static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); static void Tokenizer_dealloc(Tokenizer*); static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); static int Tokenizer_parse_entity(Tokenizer*); +static int Tokenizer_parse_comment(Tokenizer*); static int Tokenizer_handle_dl_term(Tokenizer*); static int Tokenizer_parse_tag(Tokenizer*); static PyObject* Tokenizer_parse(Tokenizer*, int, int); From 287bf71158e32962f358b8783c3164ba58b0448b Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 28 Aug 2013 23:02:22 -0400 Subject: [PATCH 7/9] Condense code. --- mwparserfromhell/parser/tokenizer.c | 43 ++++++++++++++++-------------------- mwparserfromhell/parser/tokenizer.py | 41 ++++++++++++++++------------------ 2 files changed, 38 insertions(+), 46 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index e0a2adb..aa1c00b 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1037,17 +1037,12 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next) Py_UNICODE after = Tokenizer_READ(self, 2); int ctx = self->topstack->context; - return ((this == *"" || this == *"\n" || this == *"[" || this == *"]") || - (this == *"|" && ctx & LC_TEMPLATE) || - (this == *"=" && ctx & LC_TEMPLATE_PARAM_KEY) || - (this == *"}" && next == *"}" && ctx & LC_TEMPLATE) || - (this == *"}" && next == *"}" && after == *"}" - && ctx & LC_ARGUMENT) || - (this == *"=" && ctx & LC_HEADING) || - (this == *"<" && next == *"/" && after != *"") || - (this == *"<" && next != *"!" && !(ctx & LC_TAG_CLOSE)) || - (this == *">" && ctx & LC_TAG_CLOSE) || - (this == *"'" && next == *"'")); + return (this == *"" || this == *"\n" || this == *"[" || this == *"]" || + this == *"<" || this == *">" || (this == *"'" && next == *"'") || + (this == *"|" && ctx & LC_TEMPLATE) || + (this == *"=" && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || + (this == *"}" && next == *"}" && + (ctx & LC_TEMPLATE || (after == *"}" && ctx & LC_ARGUMENT)))); } /* @@ -1073,7 +1068,19 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, while (1) { this = Tokenizer_READ(self, 0); next = Tokenizer_READ(self, 1); - if (!brackets && Tokenizer_is_free_link(self, this, next)) { + if (this == *"&") { + PUSH_TAIL_BUFFER(*extra, NULL) + if (Tokenizer_parse_entity(self)) + return NULL; + } + else if (this == *"<" && next == *"!" + && Tokenizer_READ(self, 2) == *"-" + && Tokenizer_READ(self, 3) == *"-") { + PUSH_TAIL_BUFFER(*extra, NULL) + if (Tokenizer_parse_comment(self)) + return NULL; + } + else if (!brackets && Tokenizer_is_free_link(self, this, next)) { self->head--; return Tokenizer_pop(self); } @@ -1086,18 +1093,6 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, } else if (this == *"]") return Tokenizer_pop(self); - else if (this == *"&") { - PUSH_TAIL_BUFFER(*extra, NULL) - if (Tokenizer_parse_entity(self)) - return NULL; - } - else if (this == *"<" && next == *"!" - && Tokenizer_READ(self, 2) == *"-" - && Tokenizer_READ(self, 3) == *"-") { - PUSH_TAIL_BUFFER(*extra, NULL) - if (Tokenizer_parse_comment(self)) - return NULL; - } else if (this == *" ") { if (brackets) { if (Tokenizer_emit(self, ExternalLinkSeparator)) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index bcc00ef..2c28718 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -389,16 +389,13 @@ class Tokenizer(object): """Return whether the current head is the end of a free link.""" # Built from _parse()'s end sentinels: after, ctx = self._read(2), self._context - return (this is self.END or this in ("\n", "[", "]") or - this == "|" and ctx & contexts.TEMPLATE or - this == "=" and ctx & contexts.TEMPLATE_PARAM_KEY or - this == next == "}" and ctx & contexts.TEMPLATE or - this == next == after == "}" and ctx & contexts.ARGUMENT or - this == "=" and ctx & contexts.HEADING or - this == "<" and next == "/" and after is not self.END or - this == "<" and next != "!" and not ctx & contexts.TAG_CLOSE or - this == ">" and ctx & contexts.TAG_CLOSE or - this == next == "'") + equal_sign_contexts = contexts.TEMPLATE_PARAM_KEY | contexts.HEADING + return (this in (self.END, "\n", "[", "]", "<", ">") or + this == next == "'" or + (this == "|" and ctx & contexts.TEMPLATE) or + (this == "=" and ctx & equal_sign_contexts) or + (this == next == "}" and ctx & contexts.TEMPLATE) or + (this == next == after == "}" and ctx & contexts.ARGUMENT)) def _really_parse_external_link(self, brackets): """Really parse an external link.""" @@ -414,18 +411,7 @@ class Tokenizer(object): tail = "" while True: this, next = self._read(), self._read(1) - if not brackets and self._is_free_link_end(this, next): - return self._pop(), tail, -1 - elif this is self.END or this == "\n": - self._fail_route() - elif this == next == "{" and self._can_recurse(): - if tail: - self._emit_text(tail) - tail = "" - self._parse_template_or_argument() - elif this == "]": - return self._pop(), tail, 0 - elif this == "&": + if this == "&": if tail: self._emit_text(tail) tail = "" @@ -436,6 +422,17 @@ class Tokenizer(object): self._emit_text(tail) tail = "" self._parse_comment() + elif not brackets and self._is_free_link_end(this, next): + return self._pop(), tail, -1 + elif this is self.END or this == "\n": + self._fail_route() + elif this == next == "{" and self._can_recurse(): + if tail: + self._emit_text(tail) + tail = "" + self._parse_template_or_argument() + elif this == "]": + return self._pop(), tail, 0 elif " " in this: before, after = this.split(" ", 1) if brackets: From 951a8737a5a3adc73d5016ff3b25e23568944c13 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 28 Aug 2013 23:33:58 -0400 Subject: [PATCH 8/9] Don't pass underlying context if this is a bracketed link. --- mwparserfromhell/parser/tokenizer.c | 2 +- mwparserfromhell/parser/tokenizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index aa1c00b..609a595 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -870,7 +870,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) Py_UNICODE this; int slashes, i; - if (Tokenizer_push(self, self->topstack->context | LC_EXT_LINK_URI)) + if (Tokenizer_push(self, LC_EXT_LINK_URI)) return -1; if (Tokenizer_READ(self, 0) == *"/" && Tokenizer_READ(self, 1) == *"/") { if (Tokenizer_emit_text(self, "//")) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 2c28718..eb4c571 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -315,7 +315,7 @@ class Tokenizer(object): def _parse_bracketed_uri_scheme(self): """Parse the URI scheme of a bracket-enclosed external link.""" - self._push(self._context | contexts.EXT_LINK_URI) + self._push(contexts.EXT_LINK_URI) if self._read() == self._read(1) == "/": self._emit_text("//") self._head += 2 From 8189e0c1435c8ef01afb6398387d8d32c6b49f5a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 29 Aug 2013 00:13:11 -0400 Subject: [PATCH 9/9] release/0.3.1 --- CHANGELOG | 5 +++++ docs/changelog.rst | 9 +++++++++ mwparserfromhell/__init__.py | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 67214fa..4663700 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,8 @@ +v0.3.1 (released August 29, 2013): + +- Fixed a parser bug involving URLs nested inside other markup. +- Fixed some typos. + v0.3 (released August 24, 2013): - Added complete support for HTML Tags, including forms like foo, diff --git a/docs/changelog.rst b/docs/changelog.rst index b6db9d9..3546f0c 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,15 @@ Changelog ========= +v0.3.1 +------ + +`Released August 29, 2013 `_ +(`changes `__): + +- Fixed a parser bug involving URLs nested inside other markup. +- Fixed some typos. + v0.3 ---- diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 3c011d0..a5fda7c 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -31,7 +31,7 @@ from __future__ import unicode_literals __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.4.dev" +__version__ = "0.3.1" __email__ = "ben.kurtovic@verizon.net" from . import (compat, definitions, nodes, parser, smart_list, string_mixin,