@@ -1037,17 +1037,12 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next) | |||
Py_UNICODE after = Tokenizer_READ(self, 2); | |||
int ctx = self->topstack->context; | |||
return ((this == *"" || this == *"\n" || this == *"[" || this == *"]") || | |||
(this == *"|" && ctx & LC_TEMPLATE) || | |||
(this == *"=" && ctx & LC_TEMPLATE_PARAM_KEY) || | |||
(this == *"}" && next == *"}" && ctx & LC_TEMPLATE) || | |||
(this == *"}" && next == *"}" && after == *"}" | |||
&& ctx & LC_ARGUMENT) || | |||
(this == *"=" && ctx & LC_HEADING) || | |||
(this == *"<" && next == *"/" && after != *"") || | |||
(this == *"<" && next != *"!" && !(ctx & LC_TAG_CLOSE)) || | |||
(this == *">" && ctx & LC_TAG_CLOSE) || | |||
(this == *"'" && next == *"'")); | |||
return (this == *"" || this == *"\n" || this == *"[" || this == *"]" || | |||
this == *"<" || this == *">" || (this == *"'" && next == *"'") || | |||
(this == *"|" && ctx & LC_TEMPLATE) || | |||
(this == *"=" && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || | |||
(this == *"}" && next == *"}" && | |||
(ctx & LC_TEMPLATE || (after == *"}" && ctx & LC_ARGUMENT)))); | |||
} | |||
/* | |||
@@ -1073,7 +1068,19 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||
while (1) { | |||
this = Tokenizer_READ(self, 0); | |||
next = Tokenizer_READ(self, 1); | |||
if (!brackets && Tokenizer_is_free_link(self, this, next)) { | |||
if (this == *"&") { | |||
PUSH_TAIL_BUFFER(*extra, NULL) | |||
if (Tokenizer_parse_entity(self)) | |||
return NULL; | |||
} | |||
else if (this == *"<" && next == *"!" | |||
&& Tokenizer_READ(self, 2) == *"-" | |||
&& Tokenizer_READ(self, 3) == *"-") { | |||
PUSH_TAIL_BUFFER(*extra, NULL) | |||
if (Tokenizer_parse_comment(self)) | |||
return NULL; | |||
} | |||
else if (!brackets && Tokenizer_is_free_link(self, this, next)) { | |||
self->head--; | |||
return Tokenizer_pop(self); | |||
} | |||
@@ -1086,18 +1093,6 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||
} | |||
else if (this == *"]") | |||
return Tokenizer_pop(self); | |||
else if (this == *"&") { | |||
PUSH_TAIL_BUFFER(*extra, NULL) | |||
if (Tokenizer_parse_entity(self)) | |||
return NULL; | |||
} | |||
else if (this == *"<" && next == *"!" | |||
&& Tokenizer_READ(self, 2) == *"-" | |||
&& Tokenizer_READ(self, 3) == *"-") { | |||
PUSH_TAIL_BUFFER(*extra, NULL) | |||
if (Tokenizer_parse_comment(self)) | |||
return NULL; | |||
} | |||
else if (this == *" ") { | |||
if (brackets) { | |||
if (Tokenizer_emit(self, ExternalLinkSeparator)) | |||
@@ -389,16 +389,13 @@ class Tokenizer(object): | |||
"""Return whether the current head is the end of a free link.""" | |||
# Built from _parse()'s end sentinels: | |||
after, ctx = self._read(2), self._context | |||
return (this is self.END or this in ("\n", "[", "]") or | |||
this == "|" and ctx & contexts.TEMPLATE or | |||
this == "=" and ctx & contexts.TEMPLATE_PARAM_KEY or | |||
this == next == "}" and ctx & contexts.TEMPLATE or | |||
this == next == after == "}" and ctx & contexts.ARGUMENT or | |||
this == "=" and ctx & contexts.HEADING or | |||
this == "<" and next == "/" and after is not self.END or | |||
this == "<" and next != "!" and not ctx & contexts.TAG_CLOSE or | |||
this == ">" and ctx & contexts.TAG_CLOSE or | |||
this == next == "'") | |||
equal_sign_contexts = contexts.TEMPLATE_PARAM_KEY | contexts.HEADING | |||
return (this in (self.END, "\n", "[", "]", "<", ">") or | |||
this == next == "'" or | |||
(this == "|" and ctx & contexts.TEMPLATE) or | |||
(this == "=" and ctx & equal_sign_contexts) or | |||
(this == next == "}" and ctx & contexts.TEMPLATE) or | |||
(this == next == after == "}" and ctx & contexts.ARGUMENT)) | |||
def _really_parse_external_link(self, brackets): | |||
"""Really parse an external link.""" | |||
@@ -414,18 +411,7 @@ class Tokenizer(object): | |||
tail = "" | |||
while True: | |||
this, next = self._read(), self._read(1) | |||
if not brackets and self._is_free_link_end(this, next): | |||
return self._pop(), tail, -1 | |||
elif this is self.END or this == "\n": | |||
self._fail_route() | |||
elif this == next == "{" and self._can_recurse(): | |||
if tail: | |||
self._emit_text(tail) | |||
tail = "" | |||
self._parse_template_or_argument() | |||
elif this == "]": | |||
return self._pop(), tail, 0 | |||
elif this == "&": | |||
if this == "&": | |||
if tail: | |||
self._emit_text(tail) | |||
tail = "" | |||
@@ -436,6 +422,17 @@ class Tokenizer(object): | |||
self._emit_text(tail) | |||
tail = "" | |||
self._parse_comment() | |||
elif not brackets and self._is_free_link_end(this, next): | |||
return self._pop(), tail, -1 | |||
elif this is self.END or this == "\n": | |||
self._fail_route() | |||
elif this == next == "{" and self._can_recurse(): | |||
if tail: | |||
self._emit_text(tail) | |||
tail = "" | |||
self._parse_template_or_argument() | |||
elif this == "]": | |||
return self._pop(), tail, 0 | |||
elif " " in this: | |||
before, after = this.split(" ", 1) | |||
if brackets: | |||