Browse Source

Fix _handle_single_tag_end()'s token search order (fixes #74)

tags/v0.4
Ben Kurtovic 10 years ago
parent
commit
0497b54f03
3 changed files with 13 additions and 4 deletions
  1. +1
    -1
      mwparserfromhell/parser/tokenizer.c
  2. +5
    -3
      mwparserfromhell/parser/tokenizer.py
  3. +7
    -0
      tests/tokenizer/tags.mwtest

+ 1
- 1
mwparserfromhell/parser/tokenizer.c View File

@@ -1899,7 +1899,7 @@ static PyObject* Tokenizer_handle_single_tag_end(Tokenizer* self)
int is_instance;

len = PyList_GET_SIZE(self->topstack->stack);
for (index = 0; index < len; index++) {
for (index = len - 1; index >= 0; index--) {
token = PyList_GET_ITEM(self->topstack->stack, index);
is_instance = PyObject_IsInstance(token, TagCloseOpen);
if (is_instance == -1)


+ 5
- 3
mwparserfromhell/parser/tokenizer.py View File

@@ -21,6 +21,7 @@
# SOFTWARE.

from __future__ import unicode_literals
from itertools import izip
from math import log
import re

@@ -751,11 +752,12 @@ class Tokenizer(object):

def _handle_single_tag_end(self):
"""Handle the stream end when inside a single-supporting HTML tag."""
gen = enumerate(self._stack)
stack = self._stack
gen = izip(xrange(len(stack) - 1, -1, -1), reversed(stack))
index = next(i for i, t in gen if isinstance(t, tokens.TagCloseOpen))
padding = self._stack[index].padding
padding = stack[index].padding
token = tokens.TagCloseSelfclose(padding=padding, implicit=True)
self._stack[index] = token
stack[index] = token
return self._pop()

def _really_parse_tag(self):


+ 7
- 0
tests/tokenizer/tags.mwtest View File

@@ -124,6 +124,13 @@ output: [TagOpenOpen(), Text(text="ref"), TagAttrStart(pad_first=" ", pad_before

---

name: nested_tag_selfclosing
label: a tag nested within the attributes of another; outer tag implicitly self-closing
input: "<li <b></b></li>"
output: [TagOpenOpen(), Text(text="li"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), TagOpenOpen(), Text(text="b"), TagCloseOpen(padding=""), TagOpenClose(), Text(text="b"), TagCloseClose(), Text(text="</li"), TagCloseSelfclose(padding="", implicit=True)]

---

name: invalid_space_begin_open
label: invalid tag: a space at the beginning of the open tag
input: "< ref>test</ref>"


Loading…
Cancel
Save