Browse Source

Disallow < and > in wikilink titles/template names (fixes #104)

tags/v0.4.1
Ben Kurtovic 8 years ago
parent
commit
1d5bbbe25b
4 changed files with 25 additions and 9 deletions
  1. +2
    -0
      CHANGELOG
  2. +2
    -0
      docs/changelog.rst
  3. +13
    -6
      mwparserfromhell/parser/tokenizer.c
  4. +8
    -3
      mwparserfromhell/parser/tokenizer.py

+ 2
- 0
CHANGELOG View File

@@ -4,6 +4,8 @@ v0.4.1 (unreleased):
distributed along with new releases. Windows users can now take advantage of
C speedups without having a compiler of their own.
- Added support for Python 3.5.
- '<' and '>' are now disallowed in wikilink titles and template names. This
includes when denoting tags, but not comments.
- Fixed some bugs in the release scripts.

v0.4 (released May 23, 2015):


+ 2
- 0
docs/changelog.rst View File

@@ -11,6 +11,8 @@ Unreleased
distributed along with new releases. Windows users can now take advantage of
C speedups without having a compiler of their own.
- Added support for Python 3.5.
- ``<`` and ``>`` are now disallowed in wikilink titles and template names.
This includes when denoting tags, but not comments.
- Fixed some bugs in the release scripts.

v0.4


+ 13
- 6
mwparserfromhell/parser/tokenizer.c View File

@@ -1555,9 +1555,9 @@ static int Tokenizer_parse_comment(Tokenizer* self)
Py_DECREF(comment);
self->head += 2;
if (self->topstack->context & LC_FAIL_NEXT) {
/* _verify_safe() sets this flag while parsing a template name
when it encounters what might be a comment -- we must unset
it to let _verify_safe() know it was correct: */
/* _verify_safe() sets this flag while parsing a template or
link when it encounters what might be a comment -- we must
unset it to let _verify_safe() know it was correct: */
self->topstack->context ^= LC_FAIL_NEXT;
}
return 0;
@@ -2868,10 +2868,16 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data)
if (context & LC_FAIL_NEXT)
return -1;
if (context & LC_WIKILINK_TITLE) {
if (data == ']' || data == '{')
if (data == ']' || data == '{') {
self->topstack->context |= LC_FAIL_NEXT;
else if (data == '\n' || data == '[' || data == '}')
} else if (data == '\n' || data == '[' || data == '}' || data == '>') {
return -1;
} else if (data == '<') {
if (Tokenizer_READ(self, 1) == '!')
self->topstack->context |= LC_FAIL_NEXT;
else
return -1;
}
return 0;
}
if (context & LC_EXT_LINK_TITLE)
@@ -2883,7 +2889,8 @@ Tokenizer_verify_safe(Tokenizer* self, uint64_t context, Py_UNICODE data)
self->topstack->context |= LC_FAIL_NEXT;
return 0;
}
if (data == ']') {
if (data == ']' || data == '>' || (data == '<' &&
Tokenizer_READ(self, 1) != '!')) {
return -1;
}
if (data == '|')


+ 8
- 3
mwparserfromhell/parser/tokenizer.py View File

@@ -610,7 +610,7 @@ class Tokenizer(object):
self._head += 2
if self._context & contexts.FAIL_NEXT:
# _verify_safe() sets this flag while parsing a template
# name when it encounters what might be a comment -- we
# or link when it encounters what might be a comment -- we
# must unset it to let _verify_safe() know it was correct:
self._context ^= contexts.FAIL_NEXT
return
@@ -1172,8 +1172,13 @@ class Tokenizer(object):
if context & contexts.WIKILINK_TITLE:
if this == "]" or this == "{":
self._context |= contexts.FAIL_NEXT
elif this == "\n" or this == "[" or this == "}":
elif this == "\n" or this == "[" or this == "}" or this == ">":
return False
elif this == "<":
if self._read(1) == "!":
self._context |= contexts.FAIL_NEXT
else:
return False
return True
elif context & contexts.EXT_LINK_TITLE:
return this != "\n"
@@ -1181,7 +1186,7 @@ class Tokenizer(object):
if this == "{" or this == "}" or this == "[":
self._context |= contexts.FAIL_NEXT
return True
if this == "]":
if this == "]" or this == ">" or (this == "<" and self._read(1) != "!"):
return False
if this == "|":
return True


Loading…
Cancel
Save