diff --git a/CHANGELOG b/CHANGELOG index d3e07cc..ff0b76e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,6 @@ v0.7 (unreleased): -- ... +- Fixed parsing of leading zeros in named HTML entities. (#288) v0.6.4 (released February 14, 2022): diff --git a/docs/changelog.rst b/docs/changelog.rst index 67f06f9..e63c785 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,7 +7,8 @@ v0.7 Unreleased (`changes `__): -- ... +- Fixed parsing of leading zeros in named HTML entities. + (`#288 `_) v0.6.4 ------ diff --git a/src/mwparserfromhell/parser/ctokenizer/tok_parse.c b/src/mwparserfromhell/parser/ctokenizer/tok_parse.c index 3ee62fd..0d3594b 100644 --- a/src/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/src/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -1099,7 +1099,7 @@ Tokenizer_really_parse_entity(Tokenizer *self) } break; } - if (i == 0 && this == '0') { + if (i == 0 && numeric && this == '0') { zeroes++; self->head++; continue; diff --git a/tests/tokenizer/html_entities.mwtest b/tests/tokenizer/html_entities.mwtest index 53bedbd..942e522 100644 --- a/tests/tokenizer/html_entities.mwtest +++ b/tests/tokenizer/html_entities.mwtest @@ -156,3 +156,10 @@ name: invalid_partial_amp_pound_x_semicolon label: invalid entities: an ampersand, pound sign, and x input: "&#x" output: [Text(text="&#x")] + +--- + +name: invalid_zeros_before_named +label: invalid entities: zeros before a valid named entity +input: "&000nbsp;" +output: [Text(text="&000nbsp;")]