Browse Source

Fix a rare parsing bug involving nested broken tags.

tags/v0.5
Ben Kurtovic 6 years ago
parent
commit
cd4f90e663
5 changed files with 25 additions and 0 deletions
  1. +2
    -0
      CHANGELOG
  2. +2
    -0
      docs/changelog.rst
  3. +8
    -0
      mwparserfromhell/parser/ctokenizer/tok_parse.c
  4. +6
    -0
      mwparserfromhell/parser/tokenizer.py
  5. +7
    -0
      tests/tokenizer/tags.mwtest

+ 2
- 0
CHANGELOG View File

@@ -16,6 +16,8 @@ v0.5 (unreleased):
on incompletely-constructed StringMixIn subclasses).
- Fixed Wikicode.matches()'s behavior on iterables besides lists and tuples.
- Fixed len() sometimes raising ValueError on empty node lists.
- Fixed a rare parsing bug involving self-closing tags inside the attributes of
unpaired tags.
- Fixed release script after changes to PyPI.

v0.4.4 (released December 30, 2016):


+ 2
- 0
docs/changelog.rst View File

@@ -27,6 +27,8 @@ Unreleased
- Fixed :meth:`.Wikicode.matches`\ 's behavior on iterables besides lists and
tuples.
- Fixed ``len()`` sometimes raising ``ValueError`` on empty node lists.
- Fixed a rare parsing bug involving self-closing tags inside the attributes of
unpaired tags.
- Fixed release script after changes to PyPI.

v0.4.4


+ 8
- 0
mwparserfromhell/parser/ctokenizer/tok_parse.c View File

@@ -1548,6 +1548,14 @@ static PyObject* Tokenizer_handle_single_tag_end(Tokenizer* self)
if (depth == 0)
break;
}
is_instance = PyObject_IsInstance(token, TagCloseSelfclose);
if (is_instance == -1)
return NULL;
else if (is_instance == 1) {
depth--;
if (depth == 0) // Should never happen
return NULL;
}
}
if (!token || depth > 0)
return NULL;


+ 6
- 0
mwparserfromhell/parser/tokenizer.py View File

@@ -819,6 +819,12 @@ class Tokenizer(object):
depth -= 1
if depth == 0:
break
elif isinstance(token, tokens.TagCloseSelfclose):
depth -= 1
if depth == 0: # pragma: no cover (untestable/exceptional)
raise ParserError(
"_handle_single_tag_end() got an unexpected "
"TagCloseSelfclose")
else: # pragma: no cover (untestable/exceptional case)
raise ParserError("_handle_single_tag_end() missed a TagCloseOpen")
padding = stack[index].padding


+ 7
- 0
tests/tokenizer/tags.mwtest View File

@@ -646,3 +646,10 @@ name: non_ascii_full
label: an open/close tag pair containing non-ASCII characters
input: "<éxamplé></éxamplé>"
output: [TagOpenOpen(), Text(text="éxamplé"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="éxamplé"), TagCloseClose()]

---

name: single_nested_selfclosing
label: a single (unpaired) tag with a self-closing tag in the middle (see issue #147)
input: "<li a <br/> c>foobar"
output: [TagOpenOpen(), Text(text="li"), TagAttrStart(pad_first=" ", pad_after_eq="", pad_before_eq=" "), Text(text="a"), TagAttrStart(pad_first="", pad_after_eq="", pad_before_eq=" "), TagOpenOpen(), Text(text="br"), TagCloseSelfclose(padding=""), TagAttrStart(pad_first="", pad_after_eq="", pad_before_eq=""), Text(text="c"), TagCloseSelfclose(padding="", implicit=True), Text(text="foobar")]

Loading…
Cancel
Save