@@ -2,5 +2,6 @@ | |||
*.egg | |||
*.egg-info | |||
.DS_Store | |||
__pycache__ | |||
build | |||
docs/_build |
@@ -20,9 +20,10 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import htmlentitydefs | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str, bytes, htmlentitydefs | |||
__all__ = ["HTMLEntity"] | |||
@@ -50,10 +51,10 @@ class HTMLEntity(Node): | |||
def __unicode__(self): | |||
if self.named: | |||
return u"&{0};".format(self.value) | |||
return "&{0};".format(self.value) | |||
if self.hexadecimal: | |||
return u"&#{0}{1};".format(self.hex_char, self.value) | |||
return u"&#{0};".format(self.value) | |||
return "&#{0}{1};".format(self.hex_char, self.value) | |||
return "&#{0};".format(self.value) | |||
def __strip__(self, normalize, collapse): | |||
if normalize: | |||
@@ -71,7 +72,7 @@ class HTMLEntity(Node): | |||
except ValueError: | |||
# Test whether we're on the wide or narrow Python build. Check the | |||
# length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY): | |||
if len(u"\U0001F64A") == 2: | |||
if len("\U0001F64A") == 2: | |||
# Ensure this is within the range we can encode: | |||
if value > 0x10FFFF: | |||
raise ValueError("unichr() arg not in range(0x110000)") | |||
@@ -20,7 +20,10 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node, Text | |||
from ..compat import str, bytes | |||
__all__ = ["Tag"] | |||
@@ -92,7 +95,7 @@ class Tag(Node): | |||
result = "<" + unicode(self.tag) | |||
if self.attrs: | |||
result += " " + u" ".join([unicode(attr) for attr in self.attrs]) | |||
result += " " + " ".join([unicode(attr) for attr in self.attrs]) | |||
if self.self_closing: | |||
result += " " * self.open_padding + "/>" | |||
else: | |||
@@ -20,12 +20,14 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from collections import defaultdict | |||
import re | |||
from . import HTMLEntity, Node, Text | |||
from .extras import Parameter | |||
from ..utils import parse_anything | |||
from ..compat import str, bytes, basestring | |||
__all__ = ["Template"] | |||
@@ -42,10 +44,10 @@ class Template(Node): | |||
def __unicode__(self): | |||
if self.params: | |||
params = u"|".join([unicode(param) for param in self.params]) | |||
return "{{" + unicode(self.name) + "|" + params + "}}" | |||
params = "|".join([str(param) for param in self.params]) | |||
return "{{" + str(self.name) + "|" + params + "}}" | |||
else: | |||
return "{{" + unicode(self.name) + "}}" | |||
return "{{" + str(self.name) + "}}" | |||
def __iternodes__(self, getter): | |||
yield None, self | |||
@@ -77,7 +79,7 @@ class Template(Node): | |||
code.replace(node, node.replace(char, replacement)) | |||
def _blank_param_value(self, value): | |||
match = re.search(r"^(\s*).*?(\s*)$", unicode(value), FLAGS) | |||
match = re.search(r"^(\s*).*?(\s*)$", str(value), FLAGS) | |||
value.nodes = [Text(match.group(1)), Text(match.group(2))] | |||
def _select_theory(self, theories): | |||
@@ -85,13 +87,13 @@ class Template(Node): | |||
best = max(theories.values()) | |||
confidence = float(best) / sum(theories.values()) | |||
if confidence > 0.75: | |||
return theories.keys()[theories.values().index(best)] | |||
return tuple(theories.keys())[tuple(theories.values()).index(best)] | |||
def _get_spacing_conventions(self): | |||
before_theories = defaultdict(lambda: 0) | |||
after_theories = defaultdict(lambda: 0) | |||
for param in self.params: | |||
match = re.search(r"^(\s*).*?(\s*)$", unicode(param.value), FLAGS) | |||
match = re.search(r"^(\s*).*?(\s*)$", str(param.value), FLAGS) | |||
before, after = match.group(1), match.group(2) | |||
before_theories[before] += 1 | |||
after_theories[after] += 1 | |||
@@ -124,7 +126,7 @@ class Template(Node): | |||
return self._params | |||
def has_param(self, name, ignore_empty=True): | |||
name = name.strip() if isinstance(name, basestring) else unicode(name) | |||
name = name.strip() if isinstance(name, basestring) else str(name) | |||
for param in self.params: | |||
if param.name.strip() == name: | |||
if ignore_empty and not param.value.strip(): | |||
@@ -133,7 +135,7 @@ class Template(Node): | |||
return False | |||
def get(self, name): | |||
name = name.strip() if isinstance(name, basestring) else unicode(name) | |||
name = name.strip() if isinstance(name, basestring) else str(name) | |||
for param in reversed(self.params): | |||
if param.name.strip() == name: | |||
return param | |||
@@ -159,7 +161,7 @@ class Template(Node): | |||
if showkey is None: | |||
try: | |||
int_name = int(unicode(name)) | |||
int_name = int(str(name)) | |||
except ValueError: | |||
showkey = True | |||
else: | |||
@@ -167,7 +169,7 @@ class Template(Node): | |||
for param in self.params: | |||
if not param.showkey: | |||
if re.match(r"[1-9][0-9]*$", param.name.strip()): | |||
int_keys.add(int(unicode(param.name))) | |||
int_keys.add(int(str(param.name))) | |||
expected = min(set(range(1, len(int_keys) + 2)) - int_keys) | |||
if expected == int_name: | |||
showkey = False | |||
@@ -188,7 +190,7 @@ class Template(Node): | |||
return param | |||
def remove(self, name, keep_field=False, force_no_field=False): | |||
name = name.strip() if isinstance(name, basestring) else unicode(name) | |||
name = name.strip() if isinstance(name, basestring) else str(name) | |||
removed = False | |||
for i, param in enumerate(self.params): | |||
if param.name.strip() == name: | |||
@@ -20,7 +20,10 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import Node | |||
from ..compat import str, bytes, basestring | |||
__all__ = ["Text"] | |||
@@ -30,7 +33,7 @@ class Text(Node): | |||
self._value = value | |||
def __unicode__(self): | |||
return unicode(self.value) | |||
return str(self.value) | |||
def __strip__(self, normalize, collapse): | |||
return self | |||
@@ -20,11 +20,14 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from . import tokens | |||
from ..nodes import Heading, HTMLEntity, Tag, Template, Text | |||
from ..nodes.extras import Attribute, Parameter | |||
from ..smart_list import SmartList | |||
from ..wikicode import Wikicode | |||
from ..compat import str, bytes | |||
__all__ = ["Builder"] | |||
@@ -62,7 +65,7 @@ class Builder(object): | |||
self._tokens.append(token) | |||
value = self._pop() | |||
if not key: | |||
key = self._wrap([Text(unicode(default))]) | |||
key = self._wrap([Text(str(default))]) | |||
return Parameter(key, value, showkey) | |||
else: | |||
self._write(self._handle_token(token)) | |||
@@ -20,13 +20,14 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import htmlentitydefs | |||
from __future__ import unicode_literals | |||
from math import log | |||
import re | |||
import string | |||
from . import contexts | |||
from . import tokens | |||
from ..compat import htmlentitydefs | |||
__all__ = ["Tokenizer"] | |||
@@ -20,6 +20,9 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from ..compat import str, bytes, v | |||
__all__ = ["Token"] | |||
class Token(object): | |||
@@ -33,7 +36,7 @@ class Token(object): | |||
args.append(key + "=" + repr(value[:97] + "...")) | |||
else: | |||
args.append(key + "=" + repr(value)) | |||
return u"{0}({1})".format(type(self).__name__, u", ".join(args)) | |||
return "{0}({1})".format(type(self).__name__, ", ".join(args)) | |||
def __eq__(self, other): | |||
if isinstance(other, type(self)): | |||
@@ -49,10 +52,15 @@ class Token(object): | |||
def __delattr__(self, key): | |||
del self._kwargs[key] | |||
def make(name): | |||
__all__.append(name) | |||
return type(name, (Token,), {}) | |||
if v >= 3: | |||
def make(name): | |||
__all__.append(name) | |||
return type(name, (Token,), {}) | |||
else: | |||
def make(name): | |||
name = name.encode("utf-8") | |||
__all__.append(name) | |||
return type(name, (Token,), {}) | |||
Text = make("Text") | |||
@@ -20,230 +20,240 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from .compat import str, bytes, v | |||
__all__ = ["StringMixIn"] | |||
def inheritdoc(method): | |||
method.__doc__ = getattr(unicode, method.func_name).__doc__ | |||
try: | |||
method.__doc__ = getattr(str, method.__name__).__doc__ | |||
except AttributeError: | |||
method.__doc__ = "This feature is only available on Python 2." | |||
return method | |||
class StringMixIn(object): | |||
def __str__(self): | |||
return unicode(self).encode("utf8") | |||
if v >= 3: | |||
def __str__(self): | |||
return self.__unicode__() | |||
else: | |||
def __str__(self): | |||
return self.__unicode__().encode("utf8") | |||
def __repr__(self): | |||
return repr(unicode(self)) | |||
return repr(self.__unicode__()) | |||
def __lt__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return unicode(self) < unicode(other) | |||
return unicode(self) < other | |||
return self.__unicode__() < other.__unicode__() | |||
return self.__unicode__() < other | |||
def __le__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return unicode(self) <= unicode(other) | |||
return unicode(self) <= other | |||
return self.__unicode__() <= other.__unicode__() | |||
return self.__unicode__() <= other | |||
def __eq__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return unicode(self) == unicode(other) | |||
return unicode(self) == other | |||
return self.__unicode__() == other.__unicode__() | |||
return self.__unicode__() == other | |||
def __ne__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return unicode(self) != unicode(other) | |||
return unicode(self) != other | |||
return self.__unicode__() != other.__unicode__() | |||
return self.__unicode__() != other | |||
def __gt__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return unicode(self) > unicode(other) | |||
return unicode(self) > other | |||
return self.__unicode__() > other.__unicode__() | |||
return self.__unicode__() > other | |||
def __ge__(self, other): | |||
if isinstance(other, StringMixIn): | |||
return unicode(self) >= unicode(other) | |||
return unicode(self) >= other | |||
return self.__unicode__() >= other.__unicode__() | |||
return self.__unicode__() >= other | |||
def __nonzero__(self): | |||
return bool(unicode(self)) | |||
return bool(self.__unicode__()) | |||
def __unicode__(self): | |||
raise NotImplementedError() | |||
def __len__(self): | |||
return len(unicode(self)) | |||
return len(self.__unicode__()) | |||
def __iter__(self): | |||
for char in unicode(self): | |||
for char in self.__unicode__(): | |||
yield char | |||
def __getitem__(self, key): | |||
return unicode(self)[key] | |||
return self.__unicode__()[key] | |||
def __contains__(self, item): | |||
if isinstance(item, StringMixIn): | |||
return unicode(item) in unicode(self) | |||
return item in unicode(self) | |||
return unicode(item) in self.__unicode__() | |||
return item in self.__unicode__() | |||
@inheritdoc | |||
def capitalize(self): | |||
return unicode(self).capitalize() | |||
return self.__unicode__().capitalize() | |||
@inheritdoc | |||
def center(self, width, fillchar=None): | |||
return unicode(self).center(width, fillchar) | |||
return self.__unicode__().center(width, fillchar) | |||
@inheritdoc | |||
def count(self, sub=None, start=None, end=None): | |||
return unicode(self).count(sub, start, end) | |||
return self.__unicode__().count(sub, start, end) | |||
@inheritdoc | |||
def decode(self, encoding=None, errors=None): | |||
return unicode(self).decode(encoding, errors) | |||
return self.__unicode__().decode(encoding, errors) | |||
@inheritdoc | |||
def encode(self, encoding=None, errors=None): | |||
return unicode(self).encode(encoding, errors) | |||
return self.__unicode__().encode(encoding, errors) | |||
@inheritdoc | |||
def endswith(self, prefix, start=None, end=None): | |||
return unicode(self).endswith(prefix, start, end) | |||
return self.__unicode__().endswith(prefix, start, end) | |||
@inheritdoc | |||
def expandtabs(self, tabsize=None): | |||
return unicode(self).expandtabs(tabsize) | |||
return self.__unicode__().expandtabs(tabsize) | |||
@inheritdoc | |||
def find(self, sub=None, start=None, end=None): | |||
return unicode(self).find(sub, start, end) | |||
return self.__unicode__().find(sub, start, end) | |||
@inheritdoc | |||
def format(self, *args, **kwargs): | |||
return unicode(self).format(*args, **kwargs) | |||
return self.__unicode__().format(*args, **kwargs) | |||
@inheritdoc | |||
def index(self, sub=None, start=None, end=None): | |||
return unicode(self).index(sub, start, end) | |||
return self.__unicode__().index(sub, start, end) | |||
@inheritdoc | |||
def isalnum(self): | |||
return unicode(self).isalnum() | |||
return self.__unicode__().isalnum() | |||
@inheritdoc | |||
def isalpha(self): | |||
return unicode(self).isalpha() | |||
return self.__unicode__().isalpha() | |||
@inheritdoc | |||
def isdecimal(self): | |||
return unicode(self).isdecimal() | |||
return self.__unicode__().isdecimal() | |||
@inheritdoc | |||
def isdigit(self): | |||
return unicode(self).isdigit() | |||
return self.__unicode__().isdigit() | |||
@inheritdoc | |||
def islower(self): | |||
return unicode(self).islower() | |||
return self.__unicode__().islower() | |||
@inheritdoc | |||
def isnumeric(self): | |||
return unicode(self).isnumeric() | |||
return self.__unicode__().isnumeric() | |||
@inheritdoc | |||
def isspace(self): | |||
return unicode(self).isspace() | |||
return self.__unicode__().isspace() | |||
@inheritdoc | |||
def istitle(self): | |||
return unicode(self).istitle() | |||
return self.__unicode__().istitle() | |||
@inheritdoc | |||
def isupper(self): | |||
return unicode(self).isupper() | |||
return self.__unicode__().isupper() | |||
@inheritdoc | |||
def join(self, iterable): | |||
return unicode(self).join(iterable) | |||
return self.__unicode__().join(iterable) | |||
@inheritdoc | |||
def ljust(self, width, fillchar=None): | |||
return unicode(self).ljust(width, fillchar) | |||
return self.__unicode__().ljust(width, fillchar) | |||
@inheritdoc | |||
def lower(self): | |||
return unicode(self).lower() | |||
return self.__unicode__().lower() | |||
@inheritdoc | |||
def lstrip(self, chars=None): | |||
return unicode(self).lstrip(chars) | |||
return self.__unicode__().lstrip(chars) | |||
@inheritdoc | |||
def partition(self, sep): | |||
return unicode(self).partition(sep) | |||
return self.__unicode__().partition(sep) | |||
@inheritdoc | |||
def replace(self, old, new, count): | |||
return unicode(self).replace(old, new, count) | |||
return self.__unicode__().replace(old, new, count) | |||
@inheritdoc | |||
def rfind(self, sub=None, start=None, end=None): | |||
return unicode(self).rfind(sub, start, end) | |||
return self.__unicode__().rfind(sub, start, end) | |||
@inheritdoc | |||
def rindex(self, sub=None, start=None, end=None): | |||
return unicode(self).rindex(sub, start, end) | |||
return self.__unicode__().rindex(sub, start, end) | |||
@inheritdoc | |||
def rjust(self, width, fillchar=None): | |||
return unicode(self).rjust(width, fillchar) | |||
return self.__unicode__().rjust(width, fillchar) | |||
@inheritdoc | |||
def rpartition(self, sep): | |||
return unicode(self).rpartition(sep) | |||
return self.__unicode__().rpartition(sep) | |||
@inheritdoc | |||
def rsplit(self, sep=None, maxsplit=None): | |||
return unicode(self).rsplit(sep, maxsplit) | |||
return self.__unicode__().rsplit(sep, maxsplit) | |||
@inheritdoc | |||
def rstrip(self, chars=None): | |||
return unicode(self).rstrip(chars) | |||
return self.__unicode__().rstrip(chars) | |||
@inheritdoc | |||
def split(self, sep=None, maxsplit=None): | |||
return unicode(self).split(sep, maxsplit) | |||
return self.__unicode__().split(sep, maxsplit) | |||
@inheritdoc | |||
def splitlines(self, keepends=None): | |||
return unicode(self).splitlines(keepends) | |||
return self.__unicode__().splitlines(keepends) | |||
@inheritdoc | |||
def startswith(self, prefix, start=None, end=None): | |||
return unicode(self).startswith(prefix, start, end) | |||
return self.__unicode__().startswith(prefix, start, end) | |||
@inheritdoc | |||
def strip(self, chars=None): | |||
return unicode(self).strip(chars) | |||
return self.__unicode__().strip(chars) | |||
@inheritdoc | |||
def swapcase(self): | |||
return unicode(self).swapcase() | |||
return self.__unicode__().swapcase() | |||
@inheritdoc | |||
def title(self): | |||
return unicode(self).title() | |||
return self.__unicode__().title() | |||
@inheritdoc | |||
def translate(self, table, deletechars=None): | |||
return unicode(self).translate(table, deletechars) | |||
return self.__unicode__().translate(table, deletechars) | |||
@inheritdoc | |||
def upper(self): | |||
return unicode(self).upper() | |||
return self.__unicode__().upper() | |||
@inheritdoc | |||
def zfill(self, width): | |||
return unicode(self).zfill(width) | |||
return self.__unicode__().zfill(width) | |||
del inheritdoc |
@@ -20,9 +20,12 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import mwparserfromhell | |||
from .nodes import Node | |||
from .smart_list import SmartList | |||
from .compat import str, bytes, basestring | |||
def parse_anything(value): | |||
wikicode = mwparserfromhell.wikicode.Wikicode | |||
@@ -33,7 +36,7 @@ def parse_anything(value): | |||
if isinstance(value, basestring): | |||
return mwparserfromhell.parse(value) | |||
if isinstance(value, int): | |||
return mwparserfromhell.parse(unicode(value)) | |||
return mwparserfromhell.parse(str(value)) | |||
if value is None: | |||
return wikicode(SmartList()) | |||
try: | |||
@@ -20,12 +20,14 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import re | |||
import sys | |||
from .nodes import Heading, Node, Tag, Template, Text | |||
from .string_mixin import StringMixIn | |||
from .utils import parse_anything | |||
from .compat import str, bytes | |||
__all__ = ["Wikicode"] | |||
@@ -37,7 +39,7 @@ class Wikicode(StringMixIn): | |||
self._nodes = nodes | |||
def __unicode__(self): | |||
return "".join([unicode(node) for node in self.nodes]) | |||
return "".join([str(node) for node in self.nodes]) | |||
def _get_children(self, node): | |||
for context, child in node.__iternodes__(self._get_all_nodes): | |||
@@ -171,7 +173,7 @@ class Wikicode(StringMixIn): | |||
nodes = self.nodes | |||
for node in nodes: | |||
if not forcetype or isinstance(node, forcetype): | |||
if not matches or re.search(matches, unicode(node), flags): | |||
if not matches or re.search(matches, str(node), flags): | |||
yield node | |||
def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): | |||
@@ -229,15 +231,15 @@ class Wikicode(StringMixIn): | |||
for node in self.nodes: | |||
stripped = node.__strip__(normalize, collapse) | |||
if stripped: | |||
nodes.append(unicode(stripped)) | |||
nodes.append(str(stripped)) | |||
if collapse: | |||
stripped = u"".join(nodes).strip("\n") | |||
stripped = "".join(nodes).strip("\n") | |||
while "\n\n\n" in stripped: | |||
stripped = stripped.replace("\n\n\n", "\n\n") | |||
return stripped | |||
else: | |||
return u"".join(nodes) | |||
return "".join(nodes) | |||
def get_tree(self): | |||
marker = object() # Random object we can find with certainty in a list | |||