Merge aa4b45c1bb into e6fa7b2b2d

12 years ago · 9667e2de5a
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,6 @@
 *.egg
 *.egg-info
 .DS_Store
 __pycache__
 build
 docs/_build
--- a/mwparserfromhell/nodes/extras/attribute.py
+++ b/mwparserfromhell/nodes/extras/attribute.py
@@ -20,7 +20,10 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals

 from ...string_mixin import StringMixIn
 from ...compat import str, bytes

 __all__ = ["Attribute"]

@@ -34,9 +37,9 @@ class Attribute(StringMixIn):
    def __unicode__(self):
        if self.value:
            if self.quoted:
                return unicode(self.name) + '="' + unicode(self.value) + '"'
            return unicode(self.name) + "=" + unicode(self.value)
        return unicode(self.name)
                return str(self.name) + '="' + str(self.value) + '"'
            return str(self.name) + "=" + str(self.value)
        return str(self.name)

    @property
    def name(self):
--- a/mwparserfromhell/nodes/extras/parameter.py
+++ b/mwparserfromhell/nodes/extras/parameter.py
@@ -20,8 +20,11 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals

 from ...string_mixin import StringMixIn
 from ...utils import parse_anything
 from ...compat import str, bytes

 __all__ = ["Parameter"]

@@ -34,8 +37,8 @@ class Parameter(StringMixIn):

    def __unicode__(self):
        if self.showkey:
            return unicode(self.name) + "=" + unicode(self.value)
        return unicode(self.value)
            return str(self.name) + "=" + str(self.value)
        return str(self.value)

    @property
    def name(self):
--- a/mwparserfromhell/nodes/html_entity.py
+++ b/mwparserfromhell/nodes/html_entity.py
@@ -20,9 +20,10 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 import htmlentitydefs
 from __future__ import unicode_literals

 from . import Node
 from ..compat import str, bytes, htmlentitydefs

 __all__ = ["HTMLEntity"]

@@ -50,10 +51,10 @@ class HTMLEntity(Node):

    def __unicode__(self):
        if self.named:
            return u"&{0};".format(self.value)
            return "&{0};".format(self.value)
        if self.hexadecimal:
            return u"&#{0}{1};".format(self.hex_char, self.value)
        return u"&#{0};".format(self.value)
            return "&#{0}{1};".format(self.hex_char, self.value)
        return "&#{0};".format(self.value)

    def __strip__(self, normalize, collapse):
        if normalize:
@@ -71,7 +72,7 @@ class HTMLEntity(Node):
        except ValueError:
            # Test whether we're on the wide or narrow Python build. Check the
            # length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY):
            if len(u"\U0001F64A") == 2:
            if len("\U0001F64A") == 2:
                # Ensure this is within the range we can encode:
                if value > 0x10FFFF:
                    raise ValueError("unichr() arg not in range(0x110000)")
--- a/mwparserfromhell/nodes/tag.py
+++ b/mwparserfromhell/nodes/tag.py
@@ -20,7 +20,10 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals

 from . import Node, Text
 from ..compat import str, bytes

 __all__ = ["Tag"]

@@ -92,7 +95,7 @@ class Tag(Node):

        result = "<" + unicode(self.tag)
        if self.attrs:
            result += " " + u" ".join([unicode(attr) for attr in self.attrs])
            result += " " + " ".join([unicode(attr) for attr in self.attrs])
        if self.self_closing:
            result += " " * self.open_padding + "/>"
        else:
--- a/mwparserfromhell/nodes/template.py
+++ b/mwparserfromhell/nodes/template.py
@@ -20,12 +20,14 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 from collections import defaultdict
 import re

 from . import HTMLEntity, Node, Text
 from .extras import Parameter
 from ..utils import parse_anything
 from ..compat import str, bytes, basestring

 __all__ = ["Template"]

@@ -42,10 +44,10 @@ class Template(Node):

    def __unicode__(self):
        if self.params:
            params = u"|".join([unicode(param) for param in self.params])
            return "{{" + unicode(self.name) + "|" + params + "}}"
            params = "|".join([str(param) for param in self.params])
            return "{{" + str(self.name) + "|" + params + "}}"
        else:
            return "{{" + unicode(self.name) + "}}"
            return "{{" + str(self.name) + "}}"

    def __iternodes__(self, getter):
        yield None, self
@@ -77,7 +79,7 @@ class Template(Node):
                code.replace(node, node.replace(char, replacement))

    def _blank_param_value(self, value):
        match = re.search(r"^(\s*).*?(\s*)$", unicode(value), FLAGS)
        match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS)
        value.nodes = [Text(match.group(1)), Text(match.group(2))]

    def _select_theory(self, theories):
@@ -85,13 +87,13 @@ class Template(Node):
            best = max(theories.values())
            confidence = float(best) / sum(theories.values())
            if confidence > 0.75:
                return theories.keys()[theories.values().index(best)]
                return tuple(theories.keys())[tuple(theories.values()).index(best)]

    def _get_spacing_conventions(self):
        before_theories = defaultdict(lambda: 0)
        after_theories = defaultdict(lambda: 0)
        for param in self.params:
            match = re.search(r"^(\s*).*?(\s*)$", unicode(param.value), FLAGS)
            match = re.search(r"^(\s*).*?(\s*)$", str(param.value), FLAGS)
            before, after = match.group(1), match.group(2)
            before_theories[before] += 1
            after_theories[after] += 1
@@ -128,7 +130,7 @@ class Template(Node):
        self._name = parse_anything(value)

    def has_param(self, name, ignore_empty=True):
        name = name.strip() if isinstance(name, basestring) else unicode(name)
        name = name.strip() if isinstance(name, basestring) else str(name)
        for param in self.params:
            if param.name.strip() == name:
                if ignore_empty and not param.value.strip():
@@ -137,7 +139,7 @@ class Template(Node):
        return False

    def get(self, name):
        name = name.strip() if isinstance(name, basestring) else unicode(name)
        name = name.strip() if isinstance(name, basestring) else str(name)
        for param in reversed(self.params):
            if param.name.strip() == name:
                return param
@@ -163,7 +165,7 @@ class Template(Node):

        if showkey is None:
            try:
                int_name = int(unicode(name))
                int_name = int(str(name))
            except ValueError:
                showkey = True
            else:
@@ -171,7 +173,7 @@ class Template(Node):
                for param in self.params:
                    if not param.showkey:
                        if re.match(r"[1-9][0-9]*$", param.name.strip()):
                            int_keys.add(int(unicode(param.name)))
                            int_keys.add(int(str(param.name)))
                expected = min(set(range(1, len(int_keys) + 2)) - int_keys)
                if expected == int_name:
                    showkey = False
@@ -192,7 +194,7 @@ class Template(Node):
        return param

    def remove(self, name, keep_field=False, force_no_field=False):
        name = name.strip() if isinstance(name, basestring) else unicode(name)
        name = name.strip() if isinstance(name, basestring) else str(name)
        removed = False
        for i, param in enumerate(self.params):
            if param.name.strip() == name:
--- a/mwparserfromhell/nodes/text.py
+++ b/mwparserfromhell/nodes/text.py
@@ -20,7 +20,10 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals

 from . import Node
 from ..compat import str, bytes, basestring

 __all__ = ["Text"]

@@ -30,7 +33,7 @@ class Text(Node):
        self._value = value

    def __unicode__(self):
        return unicode(self.value)
        return str(self.value)

    def __strip__(self, normalize, collapse):
        return self
--- a/mwparserfromhell/parser/builder.py
+++ b/mwparserfromhell/parser/builder.py
@@ -20,11 +20,14 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals

 from . import tokens
 from ..nodes import Heading, HTMLEntity, Tag, Template, Text
 from ..nodes.extras import Attribute, Parameter
 from ..smart_list import SmartList
 from ..wikicode import Wikicode
 from ..compat import str, bytes

 __all__ = ["Builder"]

@@ -62,7 +65,7 @@ class Builder(object):
                self._tokens.append(token)
                value = self._pop()
                if not key:
                    key = self._wrap([Text(unicode(default))])
                    key = self._wrap([Text(str(default))])
                return Parameter(key, value, showkey)
            else:
                self._write(self._handle_token(token))
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -20,13 +20,14 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 import htmlentitydefs
 from __future__ import unicode_literals
 from math import log
 import re
 import string

 from . import contexts
 from . import tokens
 from ..compat import htmlentitydefs

 __all__ = ["Tokenizer"]

--- a/mwparserfromhell/parser/tokens.py
+++ b/mwparserfromhell/parser/tokens.py
@@ -20,6 +20,9 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 from ..compat import str, bytes, v

 __all__ = ["Token"]

 class Token(object):
@@ -33,7 +36,7 @@ class Token(object):
                args.append(key + "=" + repr(value[:97] + "..."))
            else:
                args.append(key + "=" + repr(value))
        return u"{0}({1})".format(type(self).__name__, u", ".join(args))
        return "{0}({1})".format(type(self).__name__, ", ".join(args))

    def __eq__(self, other):
        if isinstance(other, type(self)):
@@ -49,10 +52,15 @@ class Token(object):
    def __delattr__(self, key):
        del self._kwargs[key]


 def make(name):
    __all__.append(name)
    return type(name, (Token,), {})
 if v >= 3:
    def make(name):
        __all__.append(name)
        return type(name, (Token,), {})
 else:
    def make(name):
        name = name.encode("utf-8")
        __all__.append(name)
        return type(name, (Token,), {})

 Text = make("Text")

--- a/mwparserfromhell/string_mixin.py
+++ b/mwparserfromhell/string_mixin.py
@@ -20,230 +20,240 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 from .compat import str, bytes, v

 __all__ = ["StringMixIn"]

 def inheritdoc(method):
    method.__doc__ = getattr(unicode, method.func_name).__doc__
    try:
        method.__doc__ = getattr(str, method.__name__).__doc__
    except AttributeError:
        method.__doc__ = "This feature is only available on Python 2."
    return method


 class StringMixIn(object):
    def __str__(self):
        return unicode(self).encode("utf8")
    if v >= 3:
      def __str__(self):
          return self.__unicode__()
    else:
        def __str__(self):
            return self.__unicode__().encode("utf8")

    def __repr__(self):
        return repr(unicode(self))
        return repr(self.__unicode__())

    def __lt__(self, other):
        if isinstance(other, StringMixIn):
            return unicode(self) < unicode(other)
        return unicode(self) < other
            return self.__unicode__() < other.__unicode__()
        return self.__unicode__() < other

    def __le__(self, other):
        if isinstance(other, StringMixIn):
            return unicode(self) <= unicode(other)
        return unicode(self) <= other
            return self.__unicode__() <= other.__unicode__()
        return self.__unicode__() <= other

    def __eq__(self, other):
        if isinstance(other, StringMixIn):
            return unicode(self) == unicode(other)
        return unicode(self) == other
            return self.__unicode__() == other.__unicode__()
        return self.__unicode__() == other

    def __ne__(self, other):
        if isinstance(other, StringMixIn):
            return unicode(self) != unicode(other)
        return unicode(self) != other
            return self.__unicode__() != other.__unicode__()
        return self.__unicode__() != other

    def __gt__(self, other):
        if isinstance(other, StringMixIn):
            return unicode(self) > unicode(other)
        return unicode(self) > other
            return self.__unicode__() > other.__unicode__()
        return self.__unicode__() > other

    def __ge__(self, other):
        if isinstance(other, StringMixIn):
            return unicode(self) >= unicode(other)
        return unicode(self) >= other
            return self.__unicode__() >= other.__unicode__()
        return self.__unicode__() >= other

    def __nonzero__(self):
        return bool(unicode(self))
        return bool(self.__unicode__())

    def __unicode__(self):
        raise NotImplementedError()

    def __len__(self):
        return len(unicode(self))
        return len(self.__unicode__())

    def __iter__(self):
        for char in unicode(self):
        for char in self.__unicode__():
            yield char

    def __getitem__(self, key):
        return unicode(self)[key]
        return self.__unicode__()[key]

    def __contains__(self, item):
        if isinstance(item, StringMixIn):
            return unicode(item) in unicode(self)
        return item in unicode(self)
            return unicode(item) in self.__unicode__()
        return item in self.__unicode__()

    @inheritdoc
    def capitalize(self):
        return unicode(self).capitalize()
        return self.__unicode__().capitalize()

    @inheritdoc
    def center(self, width, fillchar=None):
        return unicode(self).center(width, fillchar)
        return self.__unicode__().center(width, fillchar)

    @inheritdoc
    def count(self, sub=None, start=None, end=None):
        return unicode(self).count(sub, start, end)
        return self.__unicode__().count(sub, start, end)

    @inheritdoc
    def decode(self, encoding=None, errors=None):
        return unicode(self).decode(encoding, errors)
        return self.__unicode__().decode(encoding, errors)

    @inheritdoc
    def encode(self, encoding=None, errors=None):
        return unicode(self).encode(encoding, errors)
        return self.__unicode__().encode(encoding, errors)

    @inheritdoc
    def endswith(self, prefix, start=None, end=None):
        return unicode(self).endswith(prefix, start, end)
        return self.__unicode__().endswith(prefix, start, end)

    @inheritdoc
    def expandtabs(self, tabsize=None):
        return unicode(self).expandtabs(tabsize)
        return self.__unicode__().expandtabs(tabsize)

    @inheritdoc
    def find(self, sub=None, start=None, end=None):
        return unicode(self).find(sub, start, end)
        return self.__unicode__().find(sub, start, end)

    @inheritdoc
    def format(self, *args, **kwargs):
        return unicode(self).format(*args, **kwargs)
        return self.__unicode__().format(*args, **kwargs)

    @inheritdoc
    def index(self, sub=None, start=None, end=None):
        return unicode(self).index(sub, start, end)
        return self.__unicode__().index(sub, start, end)

    @inheritdoc
    def isalnum(self):
        return unicode(self).isalnum()
        return self.__unicode__().isalnum()

    @inheritdoc
    def isalpha(self):
        return unicode(self).isalpha()
        return self.__unicode__().isalpha()

    @inheritdoc
    def isdecimal(self):
        return unicode(self).isdecimal()
        return self.__unicode__().isdecimal()

    @inheritdoc
    def isdigit(self):
        return unicode(self).isdigit()
        return self.__unicode__().isdigit()

    @inheritdoc
    def islower(self):
        return unicode(self).islower()
        return self.__unicode__().islower()

    @inheritdoc
    def isnumeric(self):
        return unicode(self).isnumeric()
        return self.__unicode__().isnumeric()

    @inheritdoc
    def isspace(self):
        return unicode(self).isspace()
        return self.__unicode__().isspace()

    @inheritdoc
    def istitle(self):
        return unicode(self).istitle()
        return self.__unicode__().istitle()

    @inheritdoc
    def isupper(self):
        return unicode(self).isupper()
        return self.__unicode__().isupper()

    @inheritdoc
    def join(self, iterable):
        return unicode(self).join(iterable)
        return self.__unicode__().join(iterable)

    @inheritdoc
    def ljust(self, width, fillchar=None):
        return unicode(self).ljust(width, fillchar)
        return self.__unicode__().ljust(width, fillchar)

    @inheritdoc
    def lower(self):
        return unicode(self).lower()
        return self.__unicode__().lower()

    @inheritdoc
    def lstrip(self, chars=None):
        return unicode(self).lstrip(chars)
        return self.__unicode__().lstrip(chars)

    @inheritdoc
    def partition(self, sep):
        return unicode(self).partition(sep)
        return self.__unicode__().partition(sep)

    @inheritdoc
    def replace(self, old, new, count):
        return unicode(self).replace(old, new, count)
        return self.__unicode__().replace(old, new, count)

    @inheritdoc
    def rfind(self, sub=None, start=None, end=None):
        return unicode(self).rfind(sub, start, end)
        return self.__unicode__().rfind(sub, start, end)

    @inheritdoc
    def rindex(self, sub=None, start=None, end=None):
        return unicode(self).rindex(sub, start, end)
        return self.__unicode__().rindex(sub, start, end)

    @inheritdoc
    def rjust(self, width, fillchar=None):
        return unicode(self).rjust(width, fillchar)
        return self.__unicode__().rjust(width, fillchar)

    @inheritdoc
    def rpartition(self, sep):
        return unicode(self).rpartition(sep)
        return self.__unicode__().rpartition(sep)

    @inheritdoc
    def rsplit(self, sep=None, maxsplit=None):
        return unicode(self).rsplit(sep, maxsplit)
        return self.__unicode__().rsplit(sep, maxsplit)

    @inheritdoc
    def rstrip(self, chars=None):
        return unicode(self).rstrip(chars)
        return self.__unicode__().rstrip(chars)

    @inheritdoc
    def split(self, sep=None, maxsplit=None):
        return unicode(self).split(sep, maxsplit)
        return self.__unicode__().split(sep, maxsplit)

    @inheritdoc
    def splitlines(self, keepends=None):
        return unicode(self).splitlines(keepends)
        return self.__unicode__().splitlines(keepends)

    @inheritdoc
    def startswith(self, prefix, start=None, end=None):
        return unicode(self).startswith(prefix, start, end)
        return self.__unicode__().startswith(prefix, start, end)

    @inheritdoc
    def strip(self, chars=None):
        return unicode(self).strip(chars)
        return self.__unicode__().strip(chars)

    @inheritdoc
    def swapcase(self):
        return unicode(self).swapcase()
        return self.__unicode__().swapcase()

    @inheritdoc
    def title(self):
        return unicode(self).title()
        return self.__unicode__().title()

    @inheritdoc
    def translate(self, table, deletechars=None):
        return unicode(self).translate(table, deletechars)
        return self.__unicode__().translate(table, deletechars)

    @inheritdoc
    def upper(self):
        return unicode(self).upper()
        return self.__unicode__().upper()

    @inheritdoc
    def zfill(self, width):
        return unicode(self).zfill(width)
        return self.__unicode__().zfill(width)


 del inheritdoc
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -20,9 +20,12 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals

 import mwparserfromhell
 from .nodes import Node
 from .smart_list import SmartList
 from .compat import str, bytes, basestring

 def parse_anything(value):
    wikicode = mwparserfromhell.wikicode.Wikicode
@@ -33,7 +36,7 @@ def parse_anything(value):
    if isinstance(value, basestring):
        return mwparserfromhell.parse(value)
    if isinstance(value, int):
        return mwparserfromhell.parse(unicode(value))
        return mwparserfromhell.parse(str(value))
    if value is None:
        return wikicode(SmartList())
    try:
--- a/mwparserfromhell/wikicode.py
+++ b/mwparserfromhell/wikicode.py
@@ -20,12 +20,14 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 from __future__ import unicode_literals
 import re
 import sys

 from .nodes import Heading, Node, Tag, Template, Text
 from .string_mixin import StringMixIn
 from .utils import parse_anything
 from .compat import str, bytes

 __all__ = ["Wikicode"]

@@ -40,7 +42,7 @@ class Wikicode(StringMixIn):
        self._nodes = nodes

    def __unicode__(self):
        return "".join([unicode(node) for node in self.nodes])
        return "".join([str(node) for node in self.nodes])

    def _get_children(self, node):
        """Iterate over all descendants of a given node, including itself.
@@ -193,7 +195,7 @@ class Wikicode(StringMixIn):
            nodes = self.nodes
        for node in nodes:
            if not forcetype or isinstance(node, forcetype):
                if not matches or re.search(matches, unicode(node), flags):
                if not matches or re.search(matches, str(node), flags):
                    yield node

    def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS):
@@ -251,15 +253,15 @@ class Wikicode(StringMixIn):
        for node in self.nodes:
            stripped = node.__strip__(normalize, collapse)
            if stripped:
                nodes.append(unicode(stripped))
                nodes.append(str(stripped))

        if collapse:
            stripped = u"".join(nodes).strip("\n")
            stripped = "".join(nodes).strip("\n")
            while "\n\n\n" in stripped:
                stripped = stripped.replace("\n\n\n", "\n\n")
            return stripped
        else:
            return u"".join(nodes)
            return "".join(nodes)

    def get_tree(self):
        marker = object()  # Random object we can find with certainty in a list