Browse Source

Make Wikicode.matches() handle namespaces.

Fixes #302.
pull/335/head
Roy Smith 1 week ago
parent
commit
1223515fd7
2 changed files with 37 additions and 4 deletions
  1. +28
    -4
      src/mwparserfromhell/wikicode.py
  2. +9
    -0
      tests/test_wikicode.py

+ 28
- 4
src/mwparserfromhell/wikicode.py View File

@@ -516,19 +516,43 @@ class Wikicode(StringMixIn):
adjusted. Specifically, whitespace and markup is stripped and the first adjusted. Specifically, whitespace and markup is stripped and the first
letter's case is normalized. Typical usage is letter's case is normalized. Typical usage is
``if template.name.matches("stub"): ...``. ``if template.name.matches("stub"): ...``.

If either side has any colons, everything before the last colon is taken to be
a namespace and/or interwiki prefix. The parts before and after the colon are
normalized and compared separately; both must match for the result to be True.
""" """
normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s
this = normalize(self.strip_code().strip())
this = self.strip_code().strip()
this_prefix, this_postfix = self._split_and_normalize(this)


if isinstance(other, (str, bytes, Wikicode, Node)): if isinstance(other, (str, bytes, Wikicode, Node)):
that = parse_anything(other).strip_code().strip() that = parse_anything(other).strip_code().strip()
return this == normalize(that)
that_prefix, that_postfix = self._split_and_normalize(that)
return (this_prefix, this_postfix) == (that_prefix, that_postfix)


for obj in other: for obj in other:
that = parse_anything(obj).strip_code().strip() that = parse_anything(obj).strip_code().strip()
if this == normalize(that):
that_prefix, that_postfix = self._split_and_normalize(that)
if (this_prefix, this_postfix) == (that_prefix, that_postfix):
return True return True
return False return False
def _split_and_normalize(self, s):
"""Split a page title into a prefix (everything before the last colon)
and a postfix (everything after the last colon). Both parts are normalized
according to the rules specific to that part (the prefix is case-insensitive,
while the postfix is only case insensitive in the first character) before being
returned.

If there is no prefix, the returned prefix is an empty string.
"""
normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s
m = re.match(r'(.*):(.*)', s)
if m:
return normalize(m[1]).lower(), normalize(m[2])
else:
return "", normalize(s)




def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None): def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None):
"""Iterate over nodes in our list matching certain conditions. """Iterate over nodes in our list matching certain conditions.


+ 9
- 0
tests/test_wikicode.py View File

@@ -363,6 +363,8 @@ def test_matches():
code3 = parse("Hello world!") code3 = parse("Hello world!")
code4 = parse("World,_hello?") code4 = parse("World,_hello?")
code5 = parse("") code5 = parse("")
code6 = parse("File:Foo")
code7 = parse("Talk:foo")
assert code1.matches("Cleanup") is True assert code1.matches("Cleanup") is True
assert code1.matches("cleanup") is True assert code1.matches("cleanup") is True
assert code1.matches(" cleanup\n") is True assert code1.matches(" cleanup\n") is True
@@ -386,6 +388,13 @@ def test_matches():
assert code5.matches("") is True assert code5.matches("") is True
assert code5.matches("<!-- nothing -->") is True assert code5.matches("<!-- nothing -->") is True
assert code5.matches(("a", "b", "")) is True assert code5.matches(("a", "b", "")) is True
assert code6.matches("File:Foo") is True
assert code6.matches("File:foo") is True
assert code6.matches("FILE:FOO") is False
assert code6.matches("file:foo") is True
assert code6.matches("FiLe:foo") is True
assert code6.matches("FiLE:Foo") is True
assert code7.matches("Talk:Foo") is True




def test_filter_family(): def test_filter_family():


Loading…
Cancel
Save