@@ -1,6 +1,6 @@ | |||||
v0.5 (unreleased): | v0.5 (unreleased): | ||||
- | |||||
- Fixed edge cases involving wikilinks inside of external links and vice versa. | |||||
v0.4.2 (released July 30, 2015): | v0.4.2 (released July 30, 2015): | ||||
@@ -7,7 +7,7 @@ v0.5 | |||||
Unreleased | Unreleased | ||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.2...develop>`__): | (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.2...develop>`__): | ||||
- | |||||
- Fixed edge cases involving wikilinks inside of external links and vice versa. | |||||
v0.4.2 | v0.4.2 | ||||
------ | ------ | ||||
@@ -47,6 +47,8 @@ typedef struct { | |||||
/* Forward declarations */ | /* Forward declarations */ | ||||
static PyObject* Tokenizer_really_parse_external_link( | |||||
Tokenizer*, int, Textbuffer*); | |||||
static int Tokenizer_parse_entity(Tokenizer*); | static int Tokenizer_parse_entity(Tokenizer*); | ||||
static int Tokenizer_parse_comment(Tokenizer*); | static int Tokenizer_parse_comment(Tokenizer*); | ||||
static int Tokenizer_handle_dl_term(Tokenizer*); | static int Tokenizer_handle_dl_term(Tokenizer*); | ||||
@@ -362,30 +364,70 @@ static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) | |||||
static int Tokenizer_parse_wikilink(Tokenizer* self) | static int Tokenizer_parse_wikilink(Tokenizer* self) | ||||
{ | { | ||||
Py_ssize_t reset; | Py_ssize_t reset; | ||||
PyObject *wikilink; | |||||
PyObject *extlink, *wikilink, *kwargs; | |||||
reset = self->head + 1; | |||||
self->head += 2; | self->head += 2; | ||||
reset = self->head - 1; | |||||
wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE, 1); | |||||
// If the wikilink looks like an external link, parse it as such: | |||||
extlink = Tokenizer_really_parse_external_link(self, 1, NULL); | |||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
self->head = reset + 1; | |||||
// Otherwise, actually parse it as a wikilink: | |||||
wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE, 1); | |||||
if (BAD_ROUTE) { | |||||
RESET_ROUTE(); | |||||
self->head = reset; | |||||
if (Tokenizer_emit_text(self, "[[")) | |||||
return -1; | |||||
return 0; | |||||
} | |||||
if (!wikilink) | |||||
return -1; | |||||
if (Tokenizer_emit(self, WikilinkOpen)) { | |||||
Py_DECREF(wikilink); | |||||
return -1; | |||||
} | |||||
if (Tokenizer_emit_all(self, wikilink)) { | |||||
Py_DECREF(wikilink); | |||||
return -1; | |||||
} | |||||
Py_DECREF(wikilink); | |||||
if (Tokenizer_emit(self, WikilinkClose)) | |||||
return -1; | |||||
return 0; | |||||
} | |||||
if (!extlink) | |||||
return -1; | |||||
if (self->topstack->context & LC_EXT_LINK_TITLE) { | |||||
// In this exceptional case, an external link that looks like a | |||||
// wikilink inside of an external link is parsed as text: | |||||
Py_DECREF(extlink); | |||||
self->head = reset; | self->head = reset; | ||||
if (Tokenizer_emit_text(self, "[[")) | if (Tokenizer_emit_text(self, "[[")) | ||||
return -1; | return -1; | ||||
return 0; | return 0; | ||||
} | } | ||||
if (!wikilink) | |||||
if (Tokenizer_emit_text(self, "[")) { | |||||
Py_DECREF(extlink); | |||||
return -1; | return -1; | ||||
if (Tokenizer_emit(self, WikilinkOpen)) { | |||||
Py_DECREF(wikilink); | |||||
} | |||||
kwargs = PyDict_New(); | |||||
if (!kwargs) { | |||||
Py_DECREF(extlink); | |||||
return -1; | return -1; | ||||
} | } | ||||
if (Tokenizer_emit_all(self, wikilink)) { | |||||
Py_DECREF(wikilink); | |||||
PyDict_SetItemString(kwargs, "brackets", Py_True); | |||||
if (Tokenizer_emit_kwargs(self, ExternalLinkOpen, kwargs)) { | |||||
Py_DECREF(extlink); | |||||
return -1; | |||||
} | |||||
if (Tokenizer_emit_all(self, extlink)) { | |||||
Py_DECREF(extlink); | |||||
return -1; | return -1; | ||||
} | } | ||||
Py_DECREF(wikilink); | |||||
if (Tokenizer_emit(self, WikilinkClose)) | |||||
Py_DECREF(extlink); | |||||
if (Tokenizer_emit(self, ExternalLinkClose)) | |||||
return -1; | return -1; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -553,7 +595,7 @@ static int Tokenizer_handle_free_link_text( | |||||
Tokenizer* self, int* parens, Textbuffer* tail, Unicode this) | Tokenizer* self, int* parens, Textbuffer* tail, Unicode this) | ||||
{ | { | ||||
#define PUSH_TAIL_BUFFER(tail, error) \ | #define PUSH_TAIL_BUFFER(tail, error) \ | ||||
if (tail->length > 0) { \ | |||||
if (tail && tail->length > 0) { \ | |||||
if (Textbuffer_concat(self->topstack->textbuffer, tail)) \ | if (Textbuffer_concat(self->topstack->textbuffer, tail)) \ | ||||
return error; \ | return error; \ | ||||
if (Textbuffer_reset(tail)) \ | if (Textbuffer_reset(tail)) \ | ||||
@@ -299,17 +299,34 @@ class Tokenizer(object): | |||||
def _parse_wikilink(self): | def _parse_wikilink(self): | ||||
"""Parse an internal wikilink at the head of the wikicode string.""" | """Parse an internal wikilink at the head of the wikicode string.""" | ||||
reset = self._head + 1 | |||||
self._head += 2 | self._head += 2 | ||||
reset = self._head - 1 | |||||
try: | try: | ||||
wikilink = self._parse(contexts.WIKILINK_TITLE) | |||||
# If the wikilink looks like an external link, parse it as such: | |||||
link, extra, delta = self._really_parse_external_link(True) | |||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | |||||
self._emit_text("[[") | |||||
self._head = reset + 1 | |||||
try: | |||||
# Otherwise, actually parse it as a wikilink: | |||||
wikilink = self._parse(contexts.WIKILINK_TITLE) | |||||
except BadRoute: | |||||
self._head = reset | |||||
self._emit_text("[[") | |||||
else: | |||||
self._emit(tokens.WikilinkOpen()) | |||||
self._emit_all(wikilink) | |||||
self._emit(tokens.WikilinkClose()) | |||||
else: | else: | ||||
self._emit(tokens.WikilinkOpen()) | |||||
self._emit_all(wikilink) | |||||
self._emit(tokens.WikilinkClose()) | |||||
if self._context & contexts.EXT_LINK_TITLE: | |||||
# In this exceptional case, an external link that looks like a | |||||
# wikilink inside of an external link is parsed as text: | |||||
self._head = reset | |||||
self._emit_text("[[") | |||||
return | |||||
self._emit_text("[") | |||||
self._emit(tokens.ExternalLinkOpen(brackets=True)) | |||||
self._emit_all(link) | |||||
self._emit(tokens.ExternalLinkClose()) | |||||
def _handle_wikilink_separator(self): | def _handle_wikilink_separator(self): | ||||
"""Handle the separator between a wikilink's title and its text.""" | """Handle the separator between a wikilink's title and its text.""" | ||||
@@ -82,6 +82,13 @@ output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com"), Exter | |||||
--- | --- | ||||
name: brackets_recursive_2 | |||||
label: bracket-enclosed link with a double bracket-enclosed link as the title | |||||
input: "[http://example.com [[http://example.com]]]" | |||||
output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="[[http://example.com"), ExternalLinkClose(), Text(text="]]")] | |||||
--- | |||||
name: period_after | name: period_after | ||||
label: a period after a free link that is excluded | label: a period after a free link that is excluded | ||||
input: "http://example.com." | input: "http://example.com." | ||||
@@ -175,7 +175,7 @@ output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Tex | |||||
--- | --- | ||||
name: external_link_inside_wikilink_title | name: external_link_inside_wikilink_title | ||||
label: an external link inside a wikilink title, which is invalid | |||||
label: an external link inside a wikilink title, which is not parsed | |||||
input: "[[File:Example.png http://example.com]]" | input: "[[File:Example.png http://example.com]]" | ||||
output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()] | output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()] | ||||
@@ -318,3 +318,17 @@ name: incomplete_comment_in_link_title_6 | |||||
label: incomplete comments are invalid in link titles | label: incomplete comments are invalid in link titles | ||||
input: "[[foo<!--bar" | input: "[[foo<!--bar" | ||||
output: [Text(text="[[foo<!--bar")] | output: [Text(text="[[foo<!--bar")] | ||||
--- | |||||
name: wikilink_to_external_link_fallback | |||||
label: an external link enclosed in an extra pair of brackets (see issue #120) | |||||
input: "[[http://example.com foo bar]]" | |||||
output: [Text(text="["), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="foo bar"), ExternalLinkClose(), Text(text="]")] | |||||
--- | |||||
name: wikilink_to_external_link_fallback_2 | |||||
label: an external link enclosed in an extra pair of brackets (see issue #120) | |||||
input: "[[http://example.com]]" | |||||
output: [Text(text="["), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkClose(), Text(text="]")] |