@@ -1,6 +1,6 @@ | |||
v0.5 (unreleased): | |||
- | |||
- Fixed edge cases involving wikilinks inside of external links and vice versa. | |||
v0.4.2 (released July 30, 2015): | |||
@@ -7,7 +7,7 @@ v0.5 | |||
Unreleased | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.2...develop>`__): | |||
- | |||
- Fixed edge cases involving wikilinks inside of external links and vice versa. | |||
v0.4.2 | |||
------ | |||
@@ -47,6 +47,8 @@ typedef struct { | |||
/* Forward declarations */ | |||
static PyObject* Tokenizer_really_parse_external_link( | |||
Tokenizer*, int, Textbuffer*); | |||
static int Tokenizer_parse_entity(Tokenizer*); | |||
static int Tokenizer_parse_comment(Tokenizer*); | |||
static int Tokenizer_handle_dl_term(Tokenizer*); | |||
@@ -362,30 +364,70 @@ static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) | |||
static int Tokenizer_parse_wikilink(Tokenizer* self) | |||
{ | |||
Py_ssize_t reset; | |||
PyObject *wikilink; | |||
PyObject *extlink, *wikilink, *kwargs; | |||
reset = self->head + 1; | |||
self->head += 2; | |||
reset = self->head - 1; | |||
wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE, 1); | |||
// If the wikilink looks like an external link, parse it as such: | |||
extlink = Tokenizer_really_parse_external_link(self, 1, NULL); | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset + 1; | |||
// Otherwise, actually parse it as a wikilink: | |||
wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE, 1); | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
self->head = reset; | |||
if (Tokenizer_emit_text(self, "[[")) | |||
return -1; | |||
return 0; | |||
} | |||
if (!wikilink) | |||
return -1; | |||
if (Tokenizer_emit(self, WikilinkOpen)) { | |||
Py_DECREF(wikilink); | |||
return -1; | |||
} | |||
if (Tokenizer_emit_all(self, wikilink)) { | |||
Py_DECREF(wikilink); | |||
return -1; | |||
} | |||
Py_DECREF(wikilink); | |||
if (Tokenizer_emit(self, WikilinkClose)) | |||
return -1; | |||
return 0; | |||
} | |||
if (!extlink) | |||
return -1; | |||
if (self->topstack->context & LC_EXT_LINK_TITLE) { | |||
// In this exceptional case, an external link that looks like a | |||
// wikilink inside of an external link is parsed as text: | |||
Py_DECREF(extlink); | |||
self->head = reset; | |||
if (Tokenizer_emit_text(self, "[[")) | |||
return -1; | |||
return 0; | |||
} | |||
if (!wikilink) | |||
if (Tokenizer_emit_text(self, "[")) { | |||
Py_DECREF(extlink); | |||
return -1; | |||
if (Tokenizer_emit(self, WikilinkOpen)) { | |||
Py_DECREF(wikilink); | |||
} | |||
kwargs = PyDict_New(); | |||
if (!kwargs) { | |||
Py_DECREF(extlink); | |||
return -1; | |||
} | |||
if (Tokenizer_emit_all(self, wikilink)) { | |||
Py_DECREF(wikilink); | |||
PyDict_SetItemString(kwargs, "brackets", Py_True); | |||
if (Tokenizer_emit_kwargs(self, ExternalLinkOpen, kwargs)) { | |||
Py_DECREF(extlink); | |||
return -1; | |||
} | |||
if (Tokenizer_emit_all(self, extlink)) { | |||
Py_DECREF(extlink); | |||
return -1; | |||
} | |||
Py_DECREF(wikilink); | |||
if (Tokenizer_emit(self, WikilinkClose)) | |||
Py_DECREF(extlink); | |||
if (Tokenizer_emit(self, ExternalLinkClose)) | |||
return -1; | |||
return 0; | |||
} | |||
@@ -553,7 +595,7 @@ static int Tokenizer_handle_free_link_text( | |||
Tokenizer* self, int* parens, Textbuffer* tail, Unicode this) | |||
{ | |||
#define PUSH_TAIL_BUFFER(tail, error) \ | |||
if (tail->length > 0) { \ | |||
if (tail && tail->length > 0) { \ | |||
if (Textbuffer_concat(self->topstack->textbuffer, tail)) \ | |||
return error; \ | |||
if (Textbuffer_reset(tail)) \ | |||
@@ -299,17 +299,34 @@ class Tokenizer(object): | |||
def _parse_wikilink(self): | |||
"""Parse an internal wikilink at the head of the wikicode string.""" | |||
reset = self._head + 1 | |||
self._head += 2 | |||
reset = self._head - 1 | |||
try: | |||
wikilink = self._parse(contexts.WIKILINK_TITLE) | |||
# If the wikilink looks like an external link, parse it as such: | |||
link, extra, delta = self._really_parse_external_link(True) | |||
except BadRoute: | |||
self._head = reset | |||
self._emit_text("[[") | |||
self._head = reset + 1 | |||
try: | |||
# Otherwise, actually parse it as a wikilink: | |||
wikilink = self._parse(contexts.WIKILINK_TITLE) | |||
except BadRoute: | |||
self._head = reset | |||
self._emit_text("[[") | |||
else: | |||
self._emit(tokens.WikilinkOpen()) | |||
self._emit_all(wikilink) | |||
self._emit(tokens.WikilinkClose()) | |||
else: | |||
self._emit(tokens.WikilinkOpen()) | |||
self._emit_all(wikilink) | |||
self._emit(tokens.WikilinkClose()) | |||
if self._context & contexts.EXT_LINK_TITLE: | |||
# In this exceptional case, an external link that looks like a | |||
# wikilink inside of an external link is parsed as text: | |||
self._head = reset | |||
self._emit_text("[[") | |||
return | |||
self._emit_text("[") | |||
self._emit(tokens.ExternalLinkOpen(brackets=True)) | |||
self._emit_all(link) | |||
self._emit(tokens.ExternalLinkClose()) | |||
def _handle_wikilink_separator(self): | |||
"""Handle the separator between a wikilink's title and its text.""" | |||
@@ -82,6 +82,13 @@ output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com"), Exter | |||
--- | |||
name: brackets_recursive_2 | |||
label: bracket-enclosed link with a double bracket-enclosed link as the title | |||
input: "[http://example.com [[http://example.com]]]" | |||
output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="[[http://example.com"), ExternalLinkClose(), Text(text="]]")] | |||
--- | |||
name: period_after | |||
label: a period after a free link that is excluded | |||
input: "http://example.com." | |||
@@ -175,7 +175,7 @@ output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Tex | |||
--- | |||
name: external_link_inside_wikilink_title | |||
label: an external link inside a wikilink title, which is invalid | |||
label: an external link inside a wikilink title, which is not parsed | |||
input: "[[File:Example.png http://example.com]]" | |||
output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()] | |||
@@ -318,3 +318,17 @@ name: incomplete_comment_in_link_title_6 | |||
label: incomplete comments are invalid in link titles | |||
input: "[[foo<!--bar" | |||
output: [Text(text="[[foo<!--bar")] | |||
--- | |||
name: wikilink_to_external_link_fallback | |||
label: an external link enclosed in an extra pair of brackets (see issue #120) | |||
input: "[[http://example.com foo bar]]" | |||
output: [Text(text="["), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="foo bar"), ExternalLinkClose(), Text(text="]")] | |||
--- | |||
name: wikilink_to_external_link_fallback_2 | |||
label: an external link enclosed in an extra pair of brackets (see issue #120) | |||
input: "[[http://example.com]]" | |||
output: [Text(text="["), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkClose(), Text(text="]")] |