@@ -1578,16 +1578,21 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self) | |||||
while (1) { | while (1) { | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
next = Tokenizer_READ(self, 1); | next = Tokenizer_READ(self, 1); | ||||
self->head++; | |||||
if (this == *"") | if (this == *"") | ||||
return Tokenizer_fail_route(self); | return Tokenizer_fail_route(self); | ||||
else if (this == *"<" && next == *"/") { | else if (this == *"<" && next == *"/") { | ||||
if (Tokenizer_handle_tag_open_close(self)) | if (Tokenizer_handle_tag_open_close(self)) | ||||
return NULL; | return NULL; | ||||
self->head++; | |||||
return Tokenizer_parse(self, 0, 0); | return Tokenizer_parse(self, 0, 0); | ||||
} | } | ||||
if (Tokenizer_emit_char(self, this)) | |||||
else if (this == *"&") { | |||||
if (Tokenizer_parse_entity(self)) | |||||
return NULL; | |||||
} | |||||
else if (Tokenizer_emit_char(self, this)) | |||||
return NULL; | return NULL; | ||||
self->head++; | |||||
} | } | ||||
} | } | ||||
@@ -552,14 +552,17 @@ class Tokenizer(object): | |||||
"""Handle the body of an HTML tag that is parser-blacklisted.""" | """Handle the body of an HTML tag that is parser-blacklisted.""" | ||||
while True: | while True: | ||||
this, next = self._read(), self._read(1) | this, next = self._read(), self._read(1) | ||||
self._head += 1 | |||||
if this is self.END: | if this is self.END: | ||||
self._fail_route() | self._fail_route() | ||||
elif this == "<" and next == "/": | elif this == "<" and next == "/": | ||||
self._handle_tag_open_close() | self._handle_tag_open_close() | ||||
self._head += 1 | |||||
return self._parse(push=False) | return self._parse(push=False) | ||||
elif this == "&": | |||||
self._parse_entity() | |||||
else: | else: | ||||
self._emit_text(this) | self._emit_text(this) | ||||
self._head += 1 | |||||
def _handle_single_only_tag_end(self): | def _handle_single_only_tag_end(self): | ||||
"""Handle the end of an implicitly closing single-only HTML tag.""" | """Handle the end of an implicitly closing single-only HTML tag.""" | ||||
@@ -117,6 +117,20 @@ output: [Text(text="&;")] | |||||
--- | --- | ||||
name: invalid_partial_amp_pound | |||||
label: invalid entities: just an ampersand, pound sign | |||||
input: "&#" | |||||
output: [Text(text="&#")] | |||||
--- | |||||
name: invalid_partial_amp_pound_x | |||||
label: invalid entities: just an ampersand, pound sign, x | |||||
input: "&#x" | |||||
output: [Text(text="&#x")] | |||||
--- | |||||
name: invalid_partial_amp_pound_semicolon | name: invalid_partial_amp_pound_semicolon | ||||
label: invalid entities: an ampersand, pound sign, and semicolon | label: invalid entities: an ampersand, pound sign, and semicolon | ||||
input: "&#;" | input: "&#;" | ||||
@@ -467,6 +467,27 @@ output: [TemplateOpen(), Text(text="t1"), TemplateClose(), Text(text="<nowiki>") | |||||
--- | --- | ||||
name: unparsable_entity | |||||
label: a HTML entity inside unparsable text is still parsed | |||||
input: "{{t1}}<nowiki>{{t2}} {{t3}}</nowiki>{{t4}}" | |||||
output: [TemplateOpen(), Text(text="t1"), TemplateClose(), TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="{{t2}}"), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), Text(text="{{t3}}"), TagOpenClose(), Text(text="nowiki"), TagCloseClose(), TemplateOpen(), Text(text="t4"), TemplateClose()] | |||||
--- | |||||
name: unparsable_entity_incomplete | |||||
label: an incomplete HTML entity inside unparsable text | |||||
input: "<nowiki>&</nowiki>" | |||||
output: [TagOpenOpen(), Text(text="nowiki"), TagCloseOpen(padding=""), Text(text="&"), TagOpenClose(), Text(text="nowiki"), TagCloseClose()] | |||||
--- | |||||
name: unparsable_entity_incomplete_2 | |||||
label: an incomplete HTML entity inside unparsable text | |||||
input: "<nowiki>&" | |||||
output: [Text(text="<nowiki>&")] | |||||
--- | |||||
name: single_open_close | name: single_open_close | ||||
label: a tag that supports being single; both an open and a close tag | label: a tag that supports being single; both an open and a close tag | ||||
input: "foo<li>bar{{baz}}</li>" | input: "foo<li>bar{{baz}}</li>" | ||||