@@ -14,3 +14,4 @@ docs/_build | |||||
scripts/*.log | scripts/*.log | ||||
htmlcov/ | htmlcov/ | ||||
.idea/ | .idea/ | ||||
.pytest_cache/ |
@@ -1,6 +1,8 @@ | |||||
v0.7 (unreleased): | v0.7 (unreleased): | ||||
- Added Linux AArch64 wheels. (#276) | - Added Linux AArch64 wheels. (#276) | ||||
- Fixed C integer conversion, manifesting as parsing errors on big-endian | |||||
platforms. (#277) | |||||
v0.6.2 (released May 16, 2021): | v0.6.2 (released May 16, 2021): | ||||
@@ -9,6 +9,9 @@ Unreleased | |||||
- Added Linux AArch64 wheels. | - Added Linux AArch64 wheels. | ||||
(`#276 <https://github.com/earwig/mwparserfromhell/issues/276>`_) | (`#276 <https://github.com/earwig/mwparserfromhell/issues/276>`_) | ||||
- Fixed C integer conversion, manifesting as parsing errors on big-endian | |||||
platforms. | |||||
(`#277 <https://github.com/earwig/mwparserfromhell/issues/277>`_) | |||||
v0.6.2 | v0.6.2 | ||||
------ | ------ | ||||
@@ -1,5 +1,5 @@ | |||||
/* | /* | ||||
Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Copyright (C) 2012-2021 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
this software and associated documentation files (the "Software"), to deal in | this software and associated documentation files (the "Software"), to deal in | ||||
@@ -100,66 +100,6 @@ static PyObject* strip_tag_name(PyObject* token, int take_attr) | |||||
} | } | ||||
/* | /* | ||||
Check if the given character is a non-word character. | |||||
Equivalent to this Python code: | |||||
def is_non_word_character(ch): | |||||
if re.fullmatch(r"\W", chunk): | |||||
return True | |||||
return False | |||||
*/ | |||||
static int is_non_word_character(Py_UCS4 ch) | |||||
{ | |||||
int ret = 0; | |||||
PyObject* modname = NULL; | |||||
PyObject* module = NULL; | |||||
PyObject* fmatch = NULL; | |||||
PyObject* pattern = NULL; | |||||
PyObject* str = NULL; | |||||
PyObject* posArgs = NULL; | |||||
PyObject* match = NULL; | |||||
modname = PyUnicode_FromString("re"); | |||||
if (modname == NULL) | |||||
goto error; | |||||
module = PyImport_Import(modname); | |||||
if (module == NULL) | |||||
goto error; | |||||
fmatch = PyObject_GetAttrString(module, "fullmatch"); | |||||
if (fmatch == NULL) | |||||
goto error; | |||||
pattern = PyUnicode_FromString("\\W"); | |||||
if (pattern == NULL) | |||||
goto error; | |||||
str = PyUnicode_FROM_SINGLE(ch); | |||||
if (str == NULL) | |||||
goto error; | |||||
posArgs = PyTuple_Pack(2, pattern, str); | |||||
if (posArgs == NULL) | |||||
goto error; | |||||
match = PyObject_Call(fmatch, posArgs, NULL); | |||||
if (match == NULL) | |||||
goto error; | |||||
if (match != Py_None) | |||||
ret = 1; | |||||
goto end; | |||||
error: | |||||
ret = -1; | |||||
end: | |||||
Py_XDECREF(match); | |||||
Py_XDECREF(posArgs); | |||||
Py_XDECREF(str); | |||||
Py_XDECREF(pattern); | |||||
Py_XDECREF(fmatch); | |||||
Py_XDECREF(module); | |||||
Py_XDECREF(modname); | |||||
return ret; | |||||
} | |||||
/* | |||||
Parse a template at the head of the wikicode string. | Parse a template at the head of the wikicode string. | ||||
*/ | */ | ||||
static int Tokenizer_parse_template(Tokenizer* self, int has_content) | static int Tokenizer_parse_template(Tokenizer* self, int has_content) | ||||
@@ -576,7 +516,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||||
static const char* valid = URISCHEME; | static const char* valid = URISCHEME; | ||||
Textbuffer *scheme_buffer = Textbuffer_new(&self->text); | Textbuffer *scheme_buffer = Textbuffer_new(&self->text); | ||||
PyObject *scheme; | PyObject *scheme; | ||||
Py_UCS4 chunk; | |||||
Py_UCS4 ch; | |||||
Py_ssize_t i; | Py_ssize_t i; | ||||
int slashes, j; | int slashes, j; | ||||
uint64_t new_context; | uint64_t new_context; | ||||
@@ -586,15 +526,10 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||||
// We have to backtrack through the textbuffer looking for our scheme since | // We have to backtrack through the textbuffer looking for our scheme since | ||||
// it was just parsed as text: | // it was just parsed as text: | ||||
for (i = self->topstack->textbuffer->length - 1; i >= 0; i--) { | for (i = self->topstack->textbuffer->length - 1; i >= 0; i--) { | ||||
chunk = Textbuffer_read(self->topstack->textbuffer, i); | |||||
// stop at the first non-word character | |||||
int is_non_word = is_non_word_character(chunk); | |||||
if (is_non_word < 0) { | |||||
Textbuffer_dealloc(scheme_buffer); | |||||
return -1; | |||||
} | |||||
else if (is_non_word == 1) | |||||
goto end_of_loop; | |||||
ch = Textbuffer_read(self->topstack->textbuffer, i); | |||||
// Stop at the first non-word character (equivalent to \W in regex) | |||||
if (!Py_UNICODE_ISALNUM(ch) && ch != '_') | |||||
break; | |||||
j = 0; | j = 0; | ||||
do { | do { | ||||
if (!valid[j]) { | if (!valid[j]) { | ||||
@@ -602,10 +537,9 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||||
FAIL_ROUTE(0); | FAIL_ROUTE(0); | ||||
return 0; | return 0; | ||||
} | } | ||||
} while (chunk != (Py_UCS4) valid[j++]); | |||||
Textbuffer_write(scheme_buffer, chunk); | |||||
} while (ch != (Py_UCS4) valid[j++]); | |||||
Textbuffer_write(scheme_buffer, ch); | |||||
} | } | ||||
end_of_loop: | |||||
Textbuffer_reverse(scheme_buffer); | Textbuffer_reverse(scheme_buffer); | ||||
scheme = Textbuffer_render(scheme_buffer); | scheme = Textbuffer_render(scheme_buffer); | ||||
if (!scheme) { | if (!scheme) { | ||||
@@ -670,17 +604,17 @@ static int Tokenizer_handle_free_link_text( | |||||
} | } | ||||
/* | /* | ||||
Return whether the current head is the end of a free link. | |||||
Return whether the current head is the end of a URI. | |||||
*/ | */ | ||||
static int | static int | ||||
Tokenizer_is_free_link_end(Tokenizer* self, Py_UCS4 this, Py_UCS4 next) | |||||
Tokenizer_is_uri_end(Tokenizer* self, Py_UCS4 this, Py_UCS4 next) | |||||
{ | { | ||||
// Built from Tokenizer_parse()'s end sentinels: | // Built from Tokenizer_parse()'s end sentinels: | ||||
Py_UCS4 after = Tokenizer_read(self, 2); | Py_UCS4 after = Tokenizer_read(self, 2); | ||||
uint64_t ctx = self->topstack->context; | uint64_t ctx = self->topstack->context; | ||||
return (!this || this == '\n' || this == '[' || this == ']' || | return (!this || this == '\n' || this == '[' || this == ']' || | ||||
this == '<' || this == '>' || this == '"' || | |||||
this == '<' || this == '>' || this == '"' || this == ' ' || | |||||
(this == '\'' && next == '\'') || | (this == '\'' && next == '\'') || | ||||
(this == '|' && ctx & LC_TEMPLATE) || | (this == '|' && ctx & LC_TEMPLATE) || | ||||
(this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || | (this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || | ||||
@@ -723,50 +657,48 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||||
if (Tokenizer_parse_comment(self)) | if (Tokenizer_parse_comment(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (!brackets && Tokenizer_is_free_link_end(self, this, next)) { | |||||
self->head--; | |||||
return Tokenizer_pop(self); | |||||
} | |||||
else if (!this || this == '\n') | |||||
return Tokenizer_fail_route(self); | |||||
else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) { | else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) { | ||||
PUSH_TAIL_BUFFER(extra, NULL) | PUSH_TAIL_BUFFER(extra, NULL) | ||||
if (Tokenizer_parse_template_or_argument(self)) | if (Tokenizer_parse_template_or_argument(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == ']') | |||||
return Tokenizer_pop(self); | |||||
else if (this == ' ' || Tokenizer_is_free_link_end(self, this, next)) { | |||||
if (brackets) { | |||||
else if (brackets) { | |||||
if (!this || this == '\n') | |||||
return Tokenizer_fail_route(self); | |||||
if (this == ']') | |||||
return Tokenizer_pop(self); | |||||
if (Tokenizer_is_uri_end(self, this, next)) { | |||||
if (this == ' ') { | if (this == ' ') { | ||||
if (Tokenizer_emit(self, ExternalLinkSeparator)) | if (Tokenizer_emit(self, ExternalLinkSeparator)) | ||||
return NULL; | return NULL; | ||||
self->head++; | |||||
} | } | ||||
else { | else { | ||||
PyObject* kwargs = PyDict_New(); | PyObject* kwargs = PyDict_New(); | ||||
if (!kwargs) | if (!kwargs) | ||||
return NULL; | return NULL; | ||||
if (this != ' ') | |||||
PyDict_SetItemString(kwargs, "suppress_space", Py_True); | |||||
PyDict_SetItemString(kwargs, "suppress_space", Py_True); | |||||
if (Tokenizer_emit_kwargs(self, ExternalLinkSeparator, kwargs)) | if (Tokenizer_emit_kwargs(self, ExternalLinkSeparator, kwargs)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
self->topstack->context ^= LC_EXT_LINK_URI; | self->topstack->context ^= LC_EXT_LINK_URI; | ||||
self->topstack->context |= LC_EXT_LINK_TITLE; | self->topstack->context |= LC_EXT_LINK_TITLE; | ||||
if (this == ' ') | |||||
self->head++; | |||||
return Tokenizer_parse(self, 0, 0); | return Tokenizer_parse(self, 0, 0); | ||||
} | } | ||||
if (Textbuffer_write(extra, this)) | |||||
return NULL; | |||||
return Tokenizer_pop(self); | |||||
} | |||||
else if (!brackets) { | |||||
if (Tokenizer_handle_free_link_text(self, &parens, extra, this)) | |||||
if (Tokenizer_emit_char(self, this)) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
else { | else { | ||||
if (Tokenizer_emit_char(self, this)) | |||||
if (Tokenizer_is_uri_end(self, this, next)) { | |||||
if (this == ' ') { | |||||
if (Textbuffer_write(extra, this)) | |||||
return NULL; | |||||
} | |||||
else | |||||
self->head--; | |||||
return Tokenizer_pop(self); | |||||
} | |||||
if (Tokenizer_handle_free_link_text(self, &parens, extra, this)) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
self->head++; | self->head++; | ||||
@@ -129,10 +129,10 @@ static int load_tokenizer_text(TokenizerInput* text, PyObject *input) | |||||
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | ||||
{ | { | ||||
PyObject *input, *tokens; | PyObject *input, *tokens; | ||||
uint64_t context = 0; | |||||
unsigned long long context = 0; | |||||
int skip_style_tags = 0; | int skip_style_tags = 0; | ||||
if (PyArg_ParseTuple(args, "U|ii", &input, &context, &skip_style_tags)) { | |||||
if (PyArg_ParseTuple(args, "U|Kp", &input, &context, &skip_style_tags)) { | |||||
Py_INCREF(input); | Py_INCREF(input); | ||||
if (load_tokenizer_text(&self->text, input)) | if (load_tokenizer_text(&self->text, input)) | ||||
return NULL; | return NULL; | ||||
@@ -143,7 +143,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
/* Failed to parse a Unicode object; try a string instead. */ | /* Failed to parse a Unicode object; try a string instead. */ | ||||
PyErr_Clear(); | PyErr_Clear(); | ||||
if (!PyArg_ParseTuple(args, "s#|ii", &encoded, &size, &context, | |||||
if (!PyArg_ParseTuple(args, "s#|Kp", &encoded, &size, &context, | |||||
&skip_style_tags)) | &skip_style_tags)) | ||||
return NULL; | return NULL; | ||||
if (!(input = PyUnicode_FromStringAndSize(encoded, size))) | if (!(input = PyUnicode_FromStringAndSize(encoded, size))) | ||||
@@ -1,4 +1,4 @@ | |||||
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2021 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -60,8 +60,9 @@ class Tokenizer: | |||||
USES_C = False | USES_C = False | ||||
START = object() | START = object() | ||||
END = object() | END = object() | ||||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", | |||||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", '"', "#", "*", ";", | |||||
":", "/", "-", "!", "\n", START, END] | ":", "/", "-", "!", "\n", START, END] | ||||
URISCHEME = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" | |||||
MAX_DEPTH = 40 | MAX_DEPTH = 40 | ||||
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | ||||
tag_splitter = re.compile(r"([\s\"\'\\]+)") | tag_splitter = re.compile(r"([\s\"\'\\]+)") | ||||
@@ -323,7 +324,7 @@ class Tokenizer: | |||||
self._head += 2 | self._head += 2 | ||||
try: | try: | ||||
# If the wikilink looks like an external link, parse it as such: | # If the wikilink looks like an external link, parse it as such: | ||||
link, _extra, _delta = self._really_parse_external_link(True) | |||||
link, _extra = self._really_parse_external_link(True) | |||||
except BadRoute: | except BadRoute: | ||||
self._head = reset + 1 | self._head = reset + 1 | ||||
try: | try: | ||||
@@ -366,8 +367,7 @@ class Tokenizer: | |||||
self._emit_text("//") | self._emit_text("//") | ||||
self._head += 2 | self._head += 2 | ||||
else: | else: | ||||
valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" | |||||
all_valid = lambda: all(char in valid for char in self._read()) | |||||
all_valid = lambda: all(char in self.URISCHEME for char in self._read()) | |||||
scheme = "" | scheme = "" | ||||
while self._read() is not self.END and all_valid(): | while self._read() is not self.END and all_valid(): | ||||
scheme += self._read() | scheme += self._read() | ||||
@@ -386,17 +386,16 @@ class Tokenizer: | |||||
def _parse_free_uri_scheme(self): | def _parse_free_uri_scheme(self): | ||||
"""Parse the URI scheme of a free (no brackets) external link.""" | """Parse the URI scheme of a free (no brackets) external link.""" | ||||
valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" | |||||
scheme = [] | scheme = [] | ||||
try: | try: | ||||
# We have to backtrack through the textbuffer looking for our | # We have to backtrack through the textbuffer looking for our | ||||
# scheme since it was just parsed as text: | # scheme since it was just parsed as text: | ||||
for chunk in reversed(self._textbuffer): | for chunk in reversed(self._textbuffer): | ||||
for char in reversed(chunk): | for char in reversed(chunk): | ||||
# stop at the first non-word character | |||||
# Stop at the first non-word character | |||||
if re.fullmatch(r"\W", char): | if re.fullmatch(r"\W", char): | ||||
raise StopIteration() | raise StopIteration() | ||||
if char not in valid: | |||||
if char not in self.URISCHEME: | |||||
raise BadRoute() | raise BadRoute() | ||||
scheme.append(char) | scheme.append(char) | ||||
except StopIteration: | except StopIteration: | ||||
@@ -434,15 +433,15 @@ class Tokenizer: | |||||
self._emit_text(this) | self._emit_text(this) | ||||
return punct, tail | return punct, tail | ||||
def _is_free_link_end(self, this, nxt): | |||||
"""Return whether the current head is the end of a free link.""" | |||||
def _is_uri_end(self, this, nxt): | |||||
"""Return whether the current head is the end of a URI.""" | |||||
# Built from _parse()'s end sentinels: | # Built from _parse()'s end sentinels: | ||||
after, ctx = self._read(2), self._context | after, ctx = self._read(2), self._context | ||||
equal_sign_contexts = contexts.TEMPLATE_PARAM_KEY | contexts.HEADING | |||||
return (this in (self.END, "\n", "[", "]", "<", ">", "\"") or | |||||
return (this in (self.END, "\n", "[", "]", "<", ">", '"') or | |||||
" " in this or | |||||
this == nxt == "'" or | this == nxt == "'" or | ||||
(this == "|" and ctx & contexts.TEMPLATE) or | (this == "|" and ctx & contexts.TEMPLATE) or | ||||
(this == "=" and ctx & equal_sign_contexts) or | |||||
(this == "=" and ctx & (contexts.TEMPLATE_PARAM_KEY | contexts.HEADING)) or | |||||
(this == nxt == "}" and ctx & contexts.TEMPLATE) or | (this == nxt == "}" and ctx & contexts.TEMPLATE) or | ||||
(this == nxt == after == "}" and ctx & contexts.ARGUMENT)) | (this == nxt == after == "}" and ctx & contexts.ARGUMENT)) | ||||
@@ -451,6 +450,7 @@ class Tokenizer: | |||||
if brackets: | if brackets: | ||||
self._parse_bracketed_uri_scheme() | self._parse_bracketed_uri_scheme() | ||||
invalid = ("\n", " ", "]") | invalid = ("\n", " ", "]") | ||||
punct = () | |||||
else: | else: | ||||
self._parse_free_uri_scheme() | self._parse_free_uri_scheme() | ||||
invalid = ("\n", " ", "[", "]") | invalid = ("\n", " ", "[", "]") | ||||
@@ -465,53 +465,47 @@ class Tokenizer: | |||||
self._emit_text(tail) | self._emit_text(tail) | ||||
tail = "" | tail = "" | ||||
self._parse_entity() | self._parse_entity() | ||||
elif (this == "<" and nxt == "!" and self._read(2) == | |||||
self._read(3) == "-"): | |||||
elif this == "<" and nxt == "!" and self._read(2) == self._read(3) == "-": | |||||
if tail: | if tail: | ||||
self._emit_text(tail) | self._emit_text(tail) | ||||
tail = "" | tail = "" | ||||
self._parse_comment() | self._parse_comment() | ||||
elif not brackets and self._is_free_link_end(this, nxt): | |||||
return self._pop(), tail, -1 | |||||
elif this is self.END or this == "\n": | |||||
self._fail_route() | |||||
elif this == nxt == "{" and self._can_recurse(): | elif this == nxt == "{" and self._can_recurse(): | ||||
if tail: | if tail: | ||||
self._emit_text(tail) | self._emit_text(tail) | ||||
tail = "" | tail = "" | ||||
self._parse_template_or_argument() | self._parse_template_or_argument() | ||||
elif this == "]": | |||||
return self._pop(), tail, 0 | |||||
elif this == "'" and nxt == "'": | |||||
separator = tokens.ExternalLinkSeparator() | |||||
separator.suppress_space = True | |||||
self._emit(separator) | |||||
self._context ^= contexts.EXT_LINK_URI | |||||
self._context |= contexts.EXT_LINK_TITLE | |||||
return self._parse(push=False), None, 0 | |||||
elif any(ch in this for ch in (" ", "\n", "[", "]", "<", ">", | |||||
"\"")): | |||||
before, after = re.split(r"[ \n[\]<>\"]", this, maxsplit=1) | |||||
delimiter = this[len(before)] | |||||
if brackets: | |||||
self._emit_text(before) | |||||
separator = tokens.ExternalLinkSeparator() | |||||
if delimiter != " ": | |||||
elif brackets: | |||||
if this is self.END or this == "\n": | |||||
self._fail_route() | |||||
if this == "]": | |||||
return self._pop(), None | |||||
if self._is_uri_end(this, nxt): | |||||
if " " in this: | |||||
before, after = this.split(" ", 1) | |||||
self._emit_text(before) | |||||
self._emit(tokens.ExternalLinkSeparator()) | |||||
if after: | |||||
self._emit_text(after) | |||||
self._head += 1 | |||||
else: | |||||
separator = tokens.ExternalLinkSeparator() | |||||
separator.suppress_space = True | separator.suppress_space = True | ||||
self._emit(separator) | |||||
if after: | |||||
self._emit_text(after) | |||||
self._emit(separator) | |||||
self._context ^= contexts.EXT_LINK_URI | self._context ^= contexts.EXT_LINK_URI | ||||
self._context |= contexts.EXT_LINK_TITLE | self._context |= contexts.EXT_LINK_TITLE | ||||
if delimiter == " ": | |||||
self._head += 1 | |||||
return self._parse(push=False), None, 0 | |||||
punct, tail = self._handle_free_link_text(punct, tail, before) | |||||
return self._pop(), tail + " " + after, 0 | |||||
elif not brackets: | |||||
punct, tail = self._handle_free_link_text(punct, tail, this) | |||||
else: | |||||
return self._parse(push=False), None | |||||
self._emit_text(this) | self._emit_text(this) | ||||
else: | |||||
if self._is_uri_end(this, nxt): | |||||
if this is not self.END and " " in this: | |||||
before, after = this.split(" ", 1) | |||||
punct, tail = self._handle_free_link_text(punct, tail, before) | |||||
tail += " " + after | |||||
else: | |||||
self._head -= 1 | |||||
return self._pop(), tail | |||||
punct, tail = self._handle_free_link_text(punct, tail, this) | |||||
self._head += 1 | self._head += 1 | ||||
def _remove_uri_scheme_from_textbuffer(self, scheme): | def _remove_uri_scheme_from_textbuffer(self, scheme): | ||||
@@ -536,7 +530,7 @@ class Tokenizer: | |||||
reset = self._head | reset = self._head | ||||
self._head += 1 | self._head += 1 | ||||
try: | try: | ||||
link, extra, delta = self._really_parse_external_link(brackets) | |||||
link, extra = self._really_parse_external_link(brackets) | |||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | self._head = reset | ||||
if not brackets and self._context & contexts.DL_TERM: | if not brackets and self._context & contexts.DL_TERM: | ||||
@@ -550,7 +544,6 @@ class Tokenizer: | |||||
self._emit(tokens.ExternalLinkOpen(brackets=brackets)) | self._emit(tokens.ExternalLinkOpen(brackets=brackets)) | ||||
self._emit_all(link) | self._emit_all(link) | ||||
self._emit(tokens.ExternalLinkClose()) | self._emit(tokens.ExternalLinkClose()) | ||||
self._head += delta | |||||
if extra: | if extra: | ||||
self._emit_text(extra) | self._emit_text(extra) | ||||
@@ -854,8 +847,8 @@ class Tokenizer: | |||||
depth -= 1 | depth -= 1 | ||||
if depth == 0: # pragma: no cover (untestable/exceptional) | if depth == 0: # pragma: no cover (untestable/exceptional) | ||||
raise ParserError( | raise ParserError( | ||||
"_handle_single_tag_end() got an unexpected " | |||||
"TagCloseSelfclose") | |||||
"_handle_single_tag_end() got an unexpected TagCloseSelfclose" | |||||
) | |||||
else: # pragma: no cover (untestable/exceptional case) | else: # pragma: no cover (untestable/exceptional case) | ||||
raise ParserError("_handle_single_tag_end() missed a TagCloseOpen") | raise ParserError("_handle_single_tag_end() missed a TagCloseOpen") | ||||
padding = stack[index].padding | padding = stack[index].padding | ||||