Browse Source

Adding Python 3 support

tags/v0.1
Riamse 11 years ago
parent
commit
2eb76e7de0
11 changed files with 130 additions and 93 deletions
  1. +1
    -0
      .gitignore
  2. +6
    -5
      mwparserfromhell/nodes/html_entity.py
  3. +4
    -1
      mwparserfromhell/nodes/tag.py
  4. +13
    -11
      mwparserfromhell/nodes/template.py
  5. +4
    -1
      mwparserfromhell/nodes/text.py
  6. +4
    -1
      mwparserfromhell/parser/builder.py
  7. +2
    -1
      mwparserfromhell/parser/tokenizer.py
  8. +13
    -5
      mwparserfromhell/parser/tokens.py
  9. +72
    -62
      mwparserfromhell/string_mixin.py
  10. +4
    -1
      mwparserfromhell/utils.py
  11. +7
    -5
      mwparserfromhell/wikicode.py

+ 1
- 0
.gitignore View File

@@ -2,5 +2,6 @@
*.egg *.egg
*.egg-info *.egg-info
.DS_Store .DS_Store
__pycache__
build build
docs/_build docs/_build

+ 6
- 5
mwparserfromhell/nodes/html_entity.py View File

@@ -20,9 +20,10 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


import htmlentitydefs
from __future__ import unicode_literals


from . import Node from . import Node
from ..compat import str, bytes, htmlentitydefs


__all__ = ["HTMLEntity"] __all__ = ["HTMLEntity"]


@@ -50,10 +51,10 @@ class HTMLEntity(Node):


def __unicode__(self): def __unicode__(self):
if self.named: if self.named:
return u"&{0};".format(self.value)
return "&{0};".format(self.value)
if self.hexadecimal: if self.hexadecimal:
return u"&#{0}{1};".format(self.hex_char, self.value)
return u"&#{0};".format(self.value)
return "&#{0}{1};".format(self.hex_char, self.value)
return "&#{0};".format(self.value)


def __strip__(self, normalize, collapse): def __strip__(self, normalize, collapse):
if normalize: if normalize:
@@ -71,7 +72,7 @@ class HTMLEntity(Node):
except ValueError: except ValueError:
# Test whether we're on the wide or narrow Python build. Check the # Test whether we're on the wide or narrow Python build. Check the
# length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY): # length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY):
if len(u"\U0001F64A") == 2:
if len("\U0001F64A") == 2:
# Ensure this is within the range we can encode: # Ensure this is within the range we can encode:
if value > 0x10FFFF: if value > 0x10FFFF:
raise ValueError("unichr() arg not in range(0x110000)") raise ValueError("unichr() arg not in range(0x110000)")


+ 4
- 1
mwparserfromhell/nodes/tag.py View File

@@ -20,7 +20,10 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


from __future__ import unicode_literals

from . import Node, Text from . import Node, Text
from ..compat import str, bytes


__all__ = ["Tag"] __all__ = ["Tag"]


@@ -92,7 +95,7 @@ class Tag(Node):


result = "<" + unicode(self.tag) result = "<" + unicode(self.tag)
if self.attrs: if self.attrs:
result += " " + u" ".join([unicode(attr) for attr in self.attrs])
result += " " + " ".join([unicode(attr) for attr in self.attrs])
if self.self_closing: if self.self_closing:
result += " " * self.open_padding + "/>" result += " " * self.open_padding + "/>"
else: else:


+ 13
- 11
mwparserfromhell/nodes/template.py View File

@@ -20,12 +20,14 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


from __future__ import unicode_literals
from collections import defaultdict from collections import defaultdict
import re import re


from . import HTMLEntity, Node, Text from . import HTMLEntity, Node, Text
from .extras import Parameter from .extras import Parameter
from ..utils import parse_anything from ..utils import parse_anything
from ..compat import str, bytes, basestring


__all__ = ["Template"] __all__ = ["Template"]


@@ -42,10 +44,10 @@ class Template(Node):


def __unicode__(self): def __unicode__(self):
if self.params: if self.params:
params = u"|".join([unicode(param) for param in self.params])
return "{{" + unicode(self.name) + "|" + params + "}}"
params = "|".join([str(param) for param in self.params])
return "{{" + str(self.name) + "|" + params + "}}"
else: else:
return "{{" + unicode(self.name) + "}}"
return "{{" + str(self.name) + "}}"


def __iternodes__(self, getter): def __iternodes__(self, getter):
yield None, self yield None, self
@@ -77,7 +79,7 @@ class Template(Node):
code.replace(node, node.replace(char, replacement)) code.replace(node, node.replace(char, replacement))


def _blank_param_value(self, value): def _blank_param_value(self, value):
match = re.search(r"^(\s*).*?(\s*)$", unicode(value), FLAGS)
match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS)
value.nodes = [Text(match.group(1)), Text(match.group(2))] value.nodes = [Text(match.group(1)), Text(match.group(2))]


def _select_theory(self, theories): def _select_theory(self, theories):
@@ -85,13 +87,13 @@ class Template(Node):
best = max(theories.values()) best = max(theories.values())
confidence = float(best) / sum(theories.values()) confidence = float(best) / sum(theories.values())
if confidence > 0.75: if confidence > 0.75:
return theories.keys()[theories.values().index(best)]
return tuple(theories.keys())[tuple(theories.values()).index(best)]


def _get_spacing_conventions(self): def _get_spacing_conventions(self):
before_theories = defaultdict(lambda: 0) before_theories = defaultdict(lambda: 0)
after_theories = defaultdict(lambda: 0) after_theories = defaultdict(lambda: 0)
for param in self.params: for param in self.params:
match = re.search(r"^(\s*).*?(\s*)$", unicode(param.value), FLAGS)
match = re.search(r"^(\s*).*?(\s*)$", str(param.value), FLAGS)
before, after = match.group(1), match.group(2) before, after = match.group(1), match.group(2)
before_theories[before] += 1 before_theories[before] += 1
after_theories[after] += 1 after_theories[after] += 1
@@ -124,7 +126,7 @@ class Template(Node):
return self._params return self._params


def has_param(self, name, ignore_empty=True): def has_param(self, name, ignore_empty=True):
name = name.strip() if isinstance(name, basestring) else unicode(name)
name = name.strip() if isinstance(name, basestring) else str(name)
for param in self.params: for param in self.params:
if param.name.strip() == name: if param.name.strip() == name:
if ignore_empty and not param.value.strip(): if ignore_empty and not param.value.strip():
@@ -133,7 +135,7 @@ class Template(Node):
return False return False


def get(self, name): def get(self, name):
name = name.strip() if isinstance(name, basestring) else unicode(name)
name = name.strip() if isinstance(name, basestring) else str(name)
for param in reversed(self.params): for param in reversed(self.params):
if param.name.strip() == name: if param.name.strip() == name:
return param return param
@@ -159,7 +161,7 @@ class Template(Node):


if showkey is None: if showkey is None:
try: try:
int_name = int(unicode(name))
int_name = int(str(name))
except ValueError: except ValueError:
showkey = True showkey = True
else: else:
@@ -167,7 +169,7 @@ class Template(Node):
for param in self.params: for param in self.params:
if not param.showkey: if not param.showkey:
if re.match(r"[1-9][0-9]*$", param.name.strip()): if re.match(r"[1-9][0-9]*$", param.name.strip()):
int_keys.add(int(unicode(param.name)))
int_keys.add(int(str(param.name)))
expected = min(set(range(1, len(int_keys) + 2)) - int_keys) expected = min(set(range(1, len(int_keys) + 2)) - int_keys)
if expected == int_name: if expected == int_name:
showkey = False showkey = False
@@ -188,7 +190,7 @@ class Template(Node):
return param return param


def remove(self, name, keep_field=False, force_no_field=False): def remove(self, name, keep_field=False, force_no_field=False):
name = name.strip() if isinstance(name, basestring) else unicode(name)
name = name.strip() if isinstance(name, basestring) else str(name)
removed = False removed = False
for i, param in enumerate(self.params): for i, param in enumerate(self.params):
if param.name.strip() == name: if param.name.strip() == name:


+ 4
- 1
mwparserfromhell/nodes/text.py View File

@@ -20,7 +20,10 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


from __future__ import unicode_literals

from . import Node from . import Node
from ..compat import str, bytes, basestring


__all__ = ["Text"] __all__ = ["Text"]


@@ -30,7 +33,7 @@ class Text(Node):
self._value = value self._value = value


def __unicode__(self): def __unicode__(self):
return unicode(self.value)
return str(self.value)


def __strip__(self, normalize, collapse): def __strip__(self, normalize, collapse):
return self return self


+ 4
- 1
mwparserfromhell/parser/builder.py View File

@@ -20,11 +20,14 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


from __future__ import unicode_literals

from . import tokens from . import tokens
from ..nodes import Heading, HTMLEntity, Tag, Template, Text from ..nodes import Heading, HTMLEntity, Tag, Template, Text
from ..nodes.extras import Attribute, Parameter from ..nodes.extras import Attribute, Parameter
from ..smart_list import SmartList from ..smart_list import SmartList
from ..wikicode import Wikicode from ..wikicode import Wikicode
from ..compat import str, bytes


__all__ = ["Builder"] __all__ = ["Builder"]


@@ -62,7 +65,7 @@ class Builder(object):
self._tokens.append(token) self._tokens.append(token)
value = self._pop() value = self._pop()
if not key: if not key:
key = self._wrap([Text(unicode(default))])
key = self._wrap([Text(str(default))])
return Parameter(key, value, showkey) return Parameter(key, value, showkey)
else: else:
self._write(self._handle_token(token)) self._write(self._handle_token(token))


+ 2
- 1
mwparserfromhell/parser/tokenizer.py View File

@@ -20,13 +20,14 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


import htmlentitydefs
from __future__ import unicode_literals
from math import log from math import log
import re import re
import string import string


from . import contexts from . import contexts
from . import tokens from . import tokens
from ..compat import htmlentitydefs


__all__ = ["Tokenizer"] __all__ = ["Tokenizer"]




+ 13
- 5
mwparserfromhell/parser/tokens.py View File

@@ -20,6 +20,9 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


from __future__ import unicode_literals
from ..compat import str, bytes, v

__all__ = ["Token"] __all__ = ["Token"]


class Token(object): class Token(object):
@@ -33,7 +36,7 @@ class Token(object):
args.append(key + "=" + repr(value[:97] + "...")) args.append(key + "=" + repr(value[:97] + "..."))
else: else:
args.append(key + "=" + repr(value)) args.append(key + "=" + repr(value))
return u"{0}({1})".format(type(self).__name__, u", ".join(args))
return "{0}({1})".format(type(self).__name__, ", ".join(args))


def __eq__(self, other): def __eq__(self, other):
if isinstance(other, type(self)): if isinstance(other, type(self)):
@@ -49,10 +52,15 @@ class Token(object):
def __delattr__(self, key): def __delattr__(self, key):
del self._kwargs[key] del self._kwargs[key]



def make(name):
__all__.append(name)
return type(name, (Token,), {})
if v >= 3:
def make(name):
__all__.append(name)
return type(name, (Token,), {})
else:
def make(name):
name = name.encode("utf-8")
__all__.append(name)
return type(name, (Token,), {})


Text = make("Text") Text = make("Text")




+ 72
- 62
mwparserfromhell/string_mixin.py View File

@@ -20,230 +20,240 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


from __future__ import unicode_literals
from .compat import str, bytes, v

__all__ = ["StringMixIn"] __all__ = ["StringMixIn"]


def inheritdoc(method): def inheritdoc(method):
method.__doc__ = getattr(unicode, method.func_name).__doc__
try:
method.__doc__ = getattr(str, method.__name__).__doc__
except AttributeError:
method.__doc__ = "This feature is only available on Python 2."
return method return method




class StringMixIn(object): class StringMixIn(object):
def __str__(self):
return unicode(self).encode("utf8")
if v >= 3:
def __str__(self):
return self.__unicode__()
else:
def __str__(self):
return self.__unicode__().encode("utf8")


def __repr__(self): def __repr__(self):
return repr(unicode(self))
return repr(self.__unicode__())


def __lt__(self, other): def __lt__(self, other):
if isinstance(other, StringMixIn): if isinstance(other, StringMixIn):
return unicode(self) < unicode(other)
return unicode(self) < other
return self.__unicode__() < other.__unicode__()
return self.__unicode__() < other


def __le__(self, other): def __le__(self, other):
if isinstance(other, StringMixIn): if isinstance(other, StringMixIn):
return unicode(self) <= unicode(other)
return unicode(self) <= other
return self.__unicode__() <= other.__unicode__()
return self.__unicode__() <= other


def __eq__(self, other): def __eq__(self, other):
if isinstance(other, StringMixIn): if isinstance(other, StringMixIn):
return unicode(self) == unicode(other)
return unicode(self) == other
return self.__unicode__() == other.__unicode__()
return self.__unicode__() == other


def __ne__(self, other): def __ne__(self, other):
if isinstance(other, StringMixIn): if isinstance(other, StringMixIn):
return unicode(self) != unicode(other)
return unicode(self) != other
return self.__unicode__() != other.__unicode__()
return self.__unicode__() != other


def __gt__(self, other): def __gt__(self, other):
if isinstance(other, StringMixIn): if isinstance(other, StringMixIn):
return unicode(self) > unicode(other)
return unicode(self) > other
return self.__unicode__() > other.__unicode__()
return self.__unicode__() > other


def __ge__(self, other): def __ge__(self, other):
if isinstance(other, StringMixIn): if isinstance(other, StringMixIn):
return unicode(self) >= unicode(other)
return unicode(self) >= other
return self.__unicode__() >= other.__unicode__()
return self.__unicode__() >= other


def __nonzero__(self): def __nonzero__(self):
return bool(unicode(self))
return bool(self.__unicode__())


def __unicode__(self): def __unicode__(self):
raise NotImplementedError() raise NotImplementedError()


def __len__(self): def __len__(self):
return len(unicode(self))
return len(self.__unicode__())


def __iter__(self): def __iter__(self):
for char in unicode(self):
for char in self.__unicode__():
yield char yield char


def __getitem__(self, key): def __getitem__(self, key):
return unicode(self)[key]
return self.__unicode__()[key]


def __contains__(self, item): def __contains__(self, item):
if isinstance(item, StringMixIn): if isinstance(item, StringMixIn):
return unicode(item) in unicode(self)
return item in unicode(self)
return unicode(item) in self.__unicode__()
return item in self.__unicode__()


@inheritdoc @inheritdoc
def capitalize(self): def capitalize(self):
return unicode(self).capitalize()
return self.__unicode__().capitalize()


@inheritdoc @inheritdoc
def center(self, width, fillchar=None): def center(self, width, fillchar=None):
return unicode(self).center(width, fillchar)
return self.__unicode__().center(width, fillchar)


@inheritdoc @inheritdoc
def count(self, sub=None, start=None, end=None): def count(self, sub=None, start=None, end=None):
return unicode(self).count(sub, start, end)
return self.__unicode__().count(sub, start, end)


@inheritdoc @inheritdoc
def decode(self, encoding=None, errors=None): def decode(self, encoding=None, errors=None):
return unicode(self).decode(encoding, errors)
return self.__unicode__().decode(encoding, errors)


@inheritdoc @inheritdoc
def encode(self, encoding=None, errors=None): def encode(self, encoding=None, errors=None):
return unicode(self).encode(encoding, errors)
return self.__unicode__().encode(encoding, errors)


@inheritdoc @inheritdoc
def endswith(self, prefix, start=None, end=None): def endswith(self, prefix, start=None, end=None):
return unicode(self).endswith(prefix, start, end)
return self.__unicode__().endswith(prefix, start, end)


@inheritdoc @inheritdoc
def expandtabs(self, tabsize=None): def expandtabs(self, tabsize=None):
return unicode(self).expandtabs(tabsize)
return self.__unicode__().expandtabs(tabsize)


@inheritdoc @inheritdoc
def find(self, sub=None, start=None, end=None): def find(self, sub=None, start=None, end=None):
return unicode(self).find(sub, start, end)
return self.__unicode__().find(sub, start, end)


@inheritdoc @inheritdoc
def format(self, *args, **kwargs): def format(self, *args, **kwargs):
return unicode(self).format(*args, **kwargs)
return self.__unicode__().format(*args, **kwargs)


@inheritdoc @inheritdoc
def index(self, sub=None, start=None, end=None): def index(self, sub=None, start=None, end=None):
return unicode(self).index(sub, start, end)
return self.__unicode__().index(sub, start, end)


@inheritdoc @inheritdoc
def isalnum(self): def isalnum(self):
return unicode(self).isalnum()
return self.__unicode__().isalnum()


@inheritdoc @inheritdoc
def isalpha(self): def isalpha(self):
return unicode(self).isalpha()
return self.__unicode__().isalpha()


@inheritdoc @inheritdoc
def isdecimal(self): def isdecimal(self):
return unicode(self).isdecimal()
return self.__unicode__().isdecimal()


@inheritdoc @inheritdoc
def isdigit(self): def isdigit(self):
return unicode(self).isdigit()
return self.__unicode__().isdigit()


@inheritdoc @inheritdoc
def islower(self): def islower(self):
return unicode(self).islower()
return self.__unicode__().islower()


@inheritdoc @inheritdoc
def isnumeric(self): def isnumeric(self):
return unicode(self).isnumeric()
return self.__unicode__().isnumeric()


@inheritdoc @inheritdoc
def isspace(self): def isspace(self):
return unicode(self).isspace()
return self.__unicode__().isspace()


@inheritdoc @inheritdoc
def istitle(self): def istitle(self):
return unicode(self).istitle()
return self.__unicode__().istitle()


@inheritdoc @inheritdoc
def isupper(self): def isupper(self):
return unicode(self).isupper()
return self.__unicode__().isupper()


@inheritdoc @inheritdoc
def join(self, iterable): def join(self, iterable):
return unicode(self).join(iterable)
return self.__unicode__().join(iterable)


@inheritdoc @inheritdoc
def ljust(self, width, fillchar=None): def ljust(self, width, fillchar=None):
return unicode(self).ljust(width, fillchar)
return self.__unicode__().ljust(width, fillchar)


@inheritdoc @inheritdoc
def lower(self): def lower(self):
return unicode(self).lower()
return self.__unicode__().lower()


@inheritdoc @inheritdoc
def lstrip(self, chars=None): def lstrip(self, chars=None):
return unicode(self).lstrip(chars)
return self.__unicode__().lstrip(chars)


@inheritdoc @inheritdoc
def partition(self, sep): def partition(self, sep):
return unicode(self).partition(sep)
return self.__unicode__().partition(sep)


@inheritdoc @inheritdoc
def replace(self, old, new, count): def replace(self, old, new, count):
return unicode(self).replace(old, new, count)
return self.__unicode__().replace(old, new, count)


@inheritdoc @inheritdoc
def rfind(self, sub=None, start=None, end=None): def rfind(self, sub=None, start=None, end=None):
return unicode(self).rfind(sub, start, end)
return self.__unicode__().rfind(sub, start, end)


@inheritdoc @inheritdoc
def rindex(self, sub=None, start=None, end=None): def rindex(self, sub=None, start=None, end=None):
return unicode(self).rindex(sub, start, end)
return self.__unicode__().rindex(sub, start, end)


@inheritdoc @inheritdoc
def rjust(self, width, fillchar=None): def rjust(self, width, fillchar=None):
return unicode(self).rjust(width, fillchar)
return self.__unicode__().rjust(width, fillchar)


@inheritdoc @inheritdoc
def rpartition(self, sep): def rpartition(self, sep):
return unicode(self).rpartition(sep)
return self.__unicode__().rpartition(sep)


@inheritdoc @inheritdoc
def rsplit(self, sep=None, maxsplit=None): def rsplit(self, sep=None, maxsplit=None):
return unicode(self).rsplit(sep, maxsplit)
return self.__unicode__().rsplit(sep, maxsplit)


@inheritdoc @inheritdoc
def rstrip(self, chars=None): def rstrip(self, chars=None):
return unicode(self).rstrip(chars)
return self.__unicode__().rstrip(chars)


@inheritdoc @inheritdoc
def split(self, sep=None, maxsplit=None): def split(self, sep=None, maxsplit=None):
return unicode(self).split(sep, maxsplit)
return self.__unicode__().split(sep, maxsplit)


@inheritdoc @inheritdoc
def splitlines(self, keepends=None): def splitlines(self, keepends=None):
return unicode(self).splitlines(keepends)
return self.__unicode__().splitlines(keepends)


@inheritdoc @inheritdoc
def startswith(self, prefix, start=None, end=None): def startswith(self, prefix, start=None, end=None):
return unicode(self).startswith(prefix, start, end)
return self.__unicode__().startswith(prefix, start, end)


@inheritdoc @inheritdoc
def strip(self, chars=None): def strip(self, chars=None):
return unicode(self).strip(chars)
return self.__unicode__().strip(chars)


@inheritdoc @inheritdoc
def swapcase(self): def swapcase(self):
return unicode(self).swapcase()
return self.__unicode__().swapcase()


@inheritdoc @inheritdoc
def title(self): def title(self):
return unicode(self).title()
return self.__unicode__().title()


@inheritdoc @inheritdoc
def translate(self, table, deletechars=None): def translate(self, table, deletechars=None):
return unicode(self).translate(table, deletechars)
return self.__unicode__().translate(table, deletechars)


@inheritdoc @inheritdoc
def upper(self): def upper(self):
return unicode(self).upper()
return self.__unicode__().upper()


@inheritdoc @inheritdoc
def zfill(self, width): def zfill(self, width):
return unicode(self).zfill(width)
return self.__unicode__().zfill(width)




del inheritdoc del inheritdoc

+ 4
- 1
mwparserfromhell/utils.py View File

@@ -20,9 +20,12 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


from __future__ import unicode_literals

import mwparserfromhell import mwparserfromhell
from .nodes import Node from .nodes import Node
from .smart_list import SmartList from .smart_list import SmartList
from .compat import str, bytes, basestring


def parse_anything(value): def parse_anything(value):
wikicode = mwparserfromhell.wikicode.Wikicode wikicode = mwparserfromhell.wikicode.Wikicode
@@ -33,7 +36,7 @@ def parse_anything(value):
if isinstance(value, basestring): if isinstance(value, basestring):
return mwparserfromhell.parse(value) return mwparserfromhell.parse(value)
if isinstance(value, int): if isinstance(value, int):
return mwparserfromhell.parse(unicode(value))
return mwparserfromhell.parse(str(value))
if value is None: if value is None:
return wikicode(SmartList()) return wikicode(SmartList())
try: try:


+ 7
- 5
mwparserfromhell/wikicode.py View File

@@ -20,12 +20,14 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


from __future__ import unicode_literals
import re import re
import sys import sys


from .nodes import Heading, Node, Tag, Template, Text from .nodes import Heading, Node, Tag, Template, Text
from .string_mixin import StringMixIn from .string_mixin import StringMixIn
from .utils import parse_anything from .utils import parse_anything
from .compat import str, bytes


__all__ = ["Wikicode"] __all__ = ["Wikicode"]


@@ -37,7 +39,7 @@ class Wikicode(StringMixIn):
self._nodes = nodes self._nodes = nodes


def __unicode__(self): def __unicode__(self):
return "".join([unicode(node) for node in self.nodes])
return "".join([str(node) for node in self.nodes])


def _get_children(self, node): def _get_children(self, node):
for context, child in node.__iternodes__(self._get_all_nodes): for context, child in node.__iternodes__(self._get_all_nodes):
@@ -171,7 +173,7 @@ class Wikicode(StringMixIn):
nodes = self.nodes nodes = self.nodes
for node in nodes: for node in nodes:
if not forcetype or isinstance(node, forcetype): if not forcetype or isinstance(node, forcetype):
if not matches or re.search(matches, unicode(node), flags):
if not matches or re.search(matches, str(node), flags):
yield node yield node


def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS):
@@ -229,15 +231,15 @@ class Wikicode(StringMixIn):
for node in self.nodes: for node in self.nodes:
stripped = node.__strip__(normalize, collapse) stripped = node.__strip__(normalize, collapse)
if stripped: if stripped:
nodes.append(unicode(stripped))
nodes.append(str(stripped))


if collapse: if collapse:
stripped = u"".join(nodes).strip("\n")
stripped = "".join(nodes).strip("\n")
while "\n\n\n" in stripped: while "\n\n\n" in stripped:
stripped = stripped.replace("\n\n\n", "\n\n") stripped = stripped.replace("\n\n\n", "\n\n")
return stripped return stripped
else: else:
return u"".join(nodes)
return "".join(nodes)


def get_tree(self): def get_tree(self):
marker = object() # Random object we can find with certainty in a list marker = object() # Random object we can find with certainty in a list


Loading…
Cancel
Save