@@ -1037,17 +1037,12 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next) | |||||
Py_UNICODE after = Tokenizer_READ(self, 2); | Py_UNICODE after = Tokenizer_READ(self, 2); | ||||
int ctx = self->topstack->context; | int ctx = self->topstack->context; | ||||
return ((this == *"" || this == *"\n" || this == *"[" || this == *"]") || | |||||
(this == *"|" && ctx & LC_TEMPLATE) || | |||||
(this == *"=" && ctx & LC_TEMPLATE_PARAM_KEY) || | |||||
(this == *"}" && next == *"}" && ctx & LC_TEMPLATE) || | |||||
(this == *"}" && next == *"}" && after == *"}" | |||||
&& ctx & LC_ARGUMENT) || | |||||
(this == *"=" && ctx & LC_HEADING) || | |||||
(this == *"<" && next == *"/" && after != *"") || | |||||
(this == *"<" && next != *"!" && !(ctx & LC_TAG_CLOSE)) || | |||||
(this == *">" && ctx & LC_TAG_CLOSE) || | |||||
(this == *"'" && next == *"'")); | |||||
return (this == *"" || this == *"\n" || this == *"[" || this == *"]" || | |||||
this == *"<" || this == *">" || (this == *"'" && next == *"'") || | |||||
(this == *"|" && ctx & LC_TEMPLATE) || | |||||
(this == *"=" && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || | |||||
(this == *"}" && next == *"}" && | |||||
(ctx & LC_TEMPLATE || (after == *"}" && ctx & LC_ARGUMENT)))); | |||||
} | } | ||||
/* | /* | ||||
@@ -1073,7 +1068,19 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||||
while (1) { | while (1) { | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
next = Tokenizer_READ(self, 1); | next = Tokenizer_READ(self, 1); | ||||
if (!brackets && Tokenizer_is_free_link(self, this, next)) { | |||||
if (this == *"&") { | |||||
PUSH_TAIL_BUFFER(*extra, NULL) | |||||
if (Tokenizer_parse_entity(self)) | |||||
return NULL; | |||||
} | |||||
else if (this == *"<" && next == *"!" | |||||
&& Tokenizer_READ(self, 2) == *"-" | |||||
&& Tokenizer_READ(self, 3) == *"-") { | |||||
PUSH_TAIL_BUFFER(*extra, NULL) | |||||
if (Tokenizer_parse_comment(self)) | |||||
return NULL; | |||||
} | |||||
else if (!brackets && Tokenizer_is_free_link(self, this, next)) { | |||||
self->head--; | self->head--; | ||||
return Tokenizer_pop(self); | return Tokenizer_pop(self); | ||||
} | } | ||||
@@ -1086,18 +1093,6 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||||
} | } | ||||
else if (this == *"]") | else if (this == *"]") | ||||
return Tokenizer_pop(self); | return Tokenizer_pop(self); | ||||
else if (this == *"&") { | |||||
PUSH_TAIL_BUFFER(*extra, NULL) | |||||
if (Tokenizer_parse_entity(self)) | |||||
return NULL; | |||||
} | |||||
else if (this == *"<" && next == *"!" | |||||
&& Tokenizer_READ(self, 2) == *"-" | |||||
&& Tokenizer_READ(self, 3) == *"-") { | |||||
PUSH_TAIL_BUFFER(*extra, NULL) | |||||
if (Tokenizer_parse_comment(self)) | |||||
return NULL; | |||||
} | |||||
else if (this == *" ") { | else if (this == *" ") { | ||||
if (brackets) { | if (brackets) { | ||||
if (Tokenizer_emit(self, ExternalLinkSeparator)) | if (Tokenizer_emit(self, ExternalLinkSeparator)) | ||||
@@ -389,16 +389,13 @@ class Tokenizer(object): | |||||
"""Return whether the current head is the end of a free link.""" | """Return whether the current head is the end of a free link.""" | ||||
# Built from _parse()'s end sentinels: | # Built from _parse()'s end sentinels: | ||||
after, ctx = self._read(2), self._context | after, ctx = self._read(2), self._context | ||||
return (this is self.END or this in ("\n", "[", "]") or | |||||
this == "|" and ctx & contexts.TEMPLATE or | |||||
this == "=" and ctx & contexts.TEMPLATE_PARAM_KEY or | |||||
this == next == "}" and ctx & contexts.TEMPLATE or | |||||
this == next == after == "}" and ctx & contexts.ARGUMENT or | |||||
this == "=" and ctx & contexts.HEADING or | |||||
this == "<" and next == "/" and after is not self.END or | |||||
this == "<" and next != "!" and not ctx & contexts.TAG_CLOSE or | |||||
this == ">" and ctx & contexts.TAG_CLOSE or | |||||
this == next == "'") | |||||
equal_sign_contexts = contexts.TEMPLATE_PARAM_KEY | contexts.HEADING | |||||
return (this in (self.END, "\n", "[", "]", "<", ">") or | |||||
this == next == "'" or | |||||
(this == "|" and ctx & contexts.TEMPLATE) or | |||||
(this == "=" and ctx & equal_sign_contexts) or | |||||
(this == next == "}" and ctx & contexts.TEMPLATE) or | |||||
(this == next == after == "}" and ctx & contexts.ARGUMENT)) | |||||
def _really_parse_external_link(self, brackets): | def _really_parse_external_link(self, brackets): | ||||
"""Really parse an external link.""" | """Really parse an external link.""" | ||||
@@ -414,18 +411,7 @@ class Tokenizer(object): | |||||
tail = "" | tail = "" | ||||
while True: | while True: | ||||
this, next = self._read(), self._read(1) | this, next = self._read(), self._read(1) | ||||
if not brackets and self._is_free_link_end(this, next): | |||||
return self._pop(), tail, -1 | |||||
elif this is self.END or this == "\n": | |||||
self._fail_route() | |||||
elif this == next == "{" and self._can_recurse(): | |||||
if tail: | |||||
self._emit_text(tail) | |||||
tail = "" | |||||
self._parse_template_or_argument() | |||||
elif this == "]": | |||||
return self._pop(), tail, 0 | |||||
elif this == "&": | |||||
if this == "&": | |||||
if tail: | if tail: | ||||
self._emit_text(tail) | self._emit_text(tail) | ||||
tail = "" | tail = "" | ||||
@@ -436,6 +422,17 @@ class Tokenizer(object): | |||||
self._emit_text(tail) | self._emit_text(tail) | ||||
tail = "" | tail = "" | ||||
self._parse_comment() | self._parse_comment() | ||||
elif not brackets and self._is_free_link_end(this, next): | |||||
return self._pop(), tail, -1 | |||||
elif this is self.END or this == "\n": | |||||
self._fail_route() | |||||
elif this == next == "{" and self._can_recurse(): | |||||
if tail: | |||||
self._emit_text(tail) | |||||
tail = "" | |||||
self._parse_template_or_argument() | |||||
elif this == "]": | |||||
return self._pop(), tail, 0 | |||||
elif " " in this: | elif " " in this: | ||||
before, after = this.split(" ", 1) | before, after = this.split(" ", 1) | ||||
if brackets: | if brackets: | ||||