diff --git a/docs/api/mwparserfromhell.nodes.rst b/docs/api/mwparserfromhell.nodes.rst index 2cbaa1c..38058f2 100644 --- a/docs/api/mwparserfromhell.nodes.rst +++ b/docs/api/mwparserfromhell.nodes.rst @@ -9,6 +9,14 @@ nodes Package .. autoclass:: mwparserfromhell.nodes.Node :special-members: +:mod:`_base` Module +---------------------- + +.. automodule:: mwparserfromhell.nodes._base + :members: + :undoc-members: + :show-inheritance: + :mod:`argument` Module ---------------------- diff --git a/docs/api/mwparserfromhell.parser.rst b/docs/api/mwparserfromhell.parser.rst index c7c8639..72ee9eb 100644 --- a/docs/api/mwparserfromhell.parser.rst +++ b/docs/api/mwparserfromhell.parser.rst @@ -23,6 +23,13 @@ parser Package :members: :undoc-members: +:mod:`errors` Module +-------------------- + +.. automodule:: mwparserfromhell.parser.errors + :members: + :undoc-members: + :mod:`tokenizer` Module ----------------------- diff --git a/docs/api/mwparserfromhell.rst b/docs/api/mwparserfromhell.rst index 63af111..c0bdc88 100644 --- a/docs/api/mwparserfromhell.rst +++ b/docs/api/mwparserfromhell.rst @@ -8,27 +8,12 @@ mwparserfromhell Package :members: :undoc-members: -:mod:`compat` Module --------------------- - -.. automodule:: mwparserfromhell.compat - :members: - :undoc-members: - :mod:`definitions` Module ------------------------- .. automodule:: mwparserfromhell.definitions :members: -:mod:`smart_list` Module ------------------------- - -.. automodule:: mwparserfromhell.smart_list - :members: SmartList, _ListProxy - :undoc-members: - :show-inheritance: - :mod:`string_mixin` Module -------------------------- @@ -58,3 +43,4 @@ Subpackages mwparserfromhell.nodes mwparserfromhell.parser + mwparserfromhell.smart_list diff --git a/docs/api/mwparserfromhell.smart_list.rst b/docs/api/mwparserfromhell.smart_list.rst new file mode 100644 index 0000000..9312374 --- /dev/null +++ b/docs/api/mwparserfromhell.smart_list.rst @@ -0,0 +1,30 @@ +smart_list Package +================== + +:mod:`smart_list` Package +------------------------- + +.. automodule:: mwparserfromhell.smart_list + :members: + :undoc-members: + +:mod:`list_proxy` Module +--------------------- + +.. automodule:: mwparserfromhell.smart_list.list_proxy + :members: + :undoc-members: + +:mod:`smart_list` Module +--------------------- + +.. automodule:: mwparserfromhell.smart_list.smart_list + :members: + :undoc-members: + +:mod:`utils` Module +--------------------- + +.. automodule:: mwparserfromhell.smart_list.utils + :members: + :undoc-members: diff --git a/docs/conf.py b/docs/conf.py index 9666cd0..9946f3b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = u'mwparserfromhell' -copyright = u'2012–2019 Ben Kurtovic' +copyright = u'2012–2020 Ben Kurtovic' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 6056b83..609999b 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -26,7 +26,7 @@ outrageously powerful parser for `MediaWiki `_ wikico """ __author__ = "Ben Kurtovic" -__copyright__ = "Copyright (C) 2012-2019 Ben Kurtovic" +__copyright__ = "Copyright (C) 2012-2020 Ben Kurtovic" __license__ = "MIT License" __version__ = "0.6.dev0" __email__ = "ben.kurtovic@gmail.com" diff --git a/mwparserfromhell/definitions.py b/mwparserfromhell/definitions.py index 0e70cc1..bd0e969 100644 --- a/mwparserfromhell/definitions.py +++ b/mwparserfromhell/definitions.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 6aa6ea4..4c29a5b 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,42 +28,8 @@ the name of a :class:`.Template` is a :class:`.Wikicode` object that can contain text or more templates. """ - -from ..string_mixin import StringMixIn - -__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", - "Node", "Tag", "Template", "Text", "Wikilink"] - -class Node(StringMixIn): - """Represents the base Node type, demonstrating the methods to override. - - :meth:`__unicode__` must be overridden. It should return a ``unicode`` or - (``str`` in py3k) representation of the node. If the node contains - :class:`.Wikicode` objects inside of it, :meth:`__children__` should be a - generator that iterates over them. If the node is printable - (shown when the page is rendered), :meth:`__strip__` should return its - printable version, stripping out any formatting marks. It does not have to - return a string, but something that can be converted to a string with - ``str()``. Finally, :meth:`__showtree__` can be overridden to build a - nice tree representation of the node, if desired, for - :meth:`~.Wikicode.get_tree`. - """ - def __unicode__(self): - raise NotImplementedError() - - def __children__(self): - return - # pylint: disable=unreachable - yield # pragma: no cover (this is a generator that yields nothing) - - def __strip__(self, **kwargs): - return None - - def __showtree__(self, write, get, mark): - write(str(self)) - - from . import extras +from ._base import Node from .text import Text from .argument import Argument from .comment import Comment @@ -73,3 +39,6 @@ from .html_entity import HTMLEntity from .tag import Tag from .template import Template from .wikilink import Wikilink + +__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", + "Node", "Tag", "Template", "Text", "Wikilink"] diff --git a/mwparserfromhell/nodes/_base.py b/mwparserfromhell/nodes/_base.py new file mode 100644 index 0000000..e4a3c2e --- /dev/null +++ b/mwparserfromhell/nodes/_base.py @@ -0,0 +1,51 @@ +# +# Copyright (C) 2012-2020 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from ..string_mixin import StringMixIn + +__all__ = ["Node"] + +class Node(StringMixIn): + """Represents the base Node type, demonstrating the methods to override. + + :meth:`__str__` must be overridden. It should return a ``str`` + representation of the node. If the node contains :class:`.Wikicode` + objects inside of it, :meth:`__children__` should be a generator that + iterates over them. If the node is printable (shown when the page is + rendered), :meth:`__strip__` should return its printable version, + stripping out any formatting marks. It does not have to return a string, + but something that can be converted to a string with ``str()``. Finally, + :meth:`__showtree__` can be overridden to build a nice tree representation + of the node, if desired, for :meth:`~.Wikicode.get_tree`. + """ + def __str__(self): + raise NotImplementedError() + + def __children__(self): + return + # pylint: disable=unreachable + yield # pragma: no cover (this is a generator that yields nothing) + + def __strip__(self, **kwargs): + return None + + def __showtree__(self, write, get, mark): + write(str(self)) diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index 4d9d613..a852a65 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node from ..utils import parse_anything __all__ = ["Argument"] @@ -33,7 +33,7 @@ class Argument(Node): self.name = name self.default = default - def __unicode__(self): + def __str__(self): start = "{{{" + str(self.name) if self.default is not None: return start + "|" + str(self.default) + "}}}" diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index 302699e..56b05b7 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node __all__ = ["Comment"] @@ -31,7 +31,7 @@ class Comment(Node): super().__init__() self.contents = contents - def __unicode__(self): + def __str__(self): return "" @property diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index 4dc3594..ba86659 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node from ..utils import parse_anything __all__ = ["ExternalLink"] @@ -34,7 +34,7 @@ class ExternalLink(Node): self.title = title self.brackets = brackets - def __unicode__(self): + def __str__(self): if self.brackets: if self.title is not None: return "[" + str(self.url) + " " + str(self.title) + "]" @@ -79,6 +79,7 @@ class ExternalLink(Node): @url.setter def url(self, value): + # pylint: disable=import-outside-toplevel from ..parser import contexts self._url = parse_anything(value, contexts.EXT_LINK_URI) diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 38d2423..442c3ac 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -44,7 +44,7 @@ class Attribute(StringMixIn): self.pad_before_eq = pad_before_eq self.pad_after_eq = pad_after_eq - def __unicode__(self): + def __str__(self): result = self.pad_first + str(self.name) + self.pad_before_eq if self.value is not None: result += "=" + self.pad_after_eq diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index 4478084..9287e00 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -41,7 +41,7 @@ class Parameter(StringMixIn): self.value = value self.showkey = showkey - def __unicode__(self): + def __str__(self): if self.showkey: return str(self.name) + "=" + str(self.value) return str(self.value) diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 1fe8790..de4dc70 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node from ..utils import parse_anything __all__ = ["Heading"] @@ -33,7 +33,7 @@ class Heading(Node): self.title = title self.level = level - def __unicode__(self): + def __str__(self): return ("=" * self.level) + str(self.title) + ("=" * self.level) def __children__(self): diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 8a2eef4..7371f2e 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,7 @@ import html.entities as htmlentities -from . import Node +from ._base import Node __all__ = ["HTMLEntity"] @@ -49,7 +49,7 @@ class HTMLEntity(Node): self._hexadecimal = hexadecimal self._hex_char = hex_char - def __unicode__(self): + def __str__(self): if self.named: return "&{};".format(self.value) if self.hexadecimal: @@ -98,21 +98,22 @@ class HTMLEntity(Node): int(newval) except ValueError: try: - int(newval, 16) + intval = int(newval, 16) except ValueError: if newval not in htmlentities.entitydefs: - raise ValueError("entity value is not a valid name") + raise ValueError(f"entity value {newval!r} is not a valid name") from None self._named = True self._hexadecimal = False else: - if int(newval, 16) < 0 or int(newval, 16) > 0x10FFFF: - raise ValueError("entity value is not in range(0x110000)") + if intval < 0 or intval > 0x10FFFF: + raise ValueError( + f"entity value 0x{intval:x} is not in range(0x110000)") from None self._named = False self._hexadecimal = True else: test = int(newval, 16 if self.hexadecimal else 10) if test < 0 or test > 0x10FFFF: - raise ValueError("entity value is not in range(0x110000)") + raise ValueError(f"entity value {test} is not in range(0x110000)") self._named = False self._value = newval @@ -120,13 +121,13 @@ class HTMLEntity(Node): def named(self, newval): newval = bool(newval) if newval and self.value not in htmlentities.entitydefs: - raise ValueError("entity value is not a valid name") + raise ValueError(f"entity value {self.value!r} is not a valid name") if not newval: try: int(self.value, 16) - except ValueError: - err = "current entity value is not a valid Unicode codepoint" - raise ValueError(err) + except ValueError as exc: + raise ValueError(f"current entity value {self.value!r} " + f"is not a valid Unicode codepoint") from exc self._named = newval @hexadecimal.setter diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 9fa45c5..094853b 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node from .extras import Attribute from ..definitions import is_visible from ..utils import parse_anything @@ -50,7 +50,7 @@ class Tag(Node): if closing_wiki_markup is not None: self.closing_wiki_markup = closing_wiki_markup - def __unicode__(self): + def __str__(self): if self.wiki_markup: if self.attributes: attrs = "".join([str(attr) for attr in self.attributes]) @@ -60,10 +60,9 @@ class Tag(Node): separator = self.wiki_style_separator or "" if self.self_closing: return self.wiki_markup + attrs + padding + separator - else: - close = self.closing_wiki_markup or "" - return self.wiki_markup + attrs + padding + separator + \ - str(self.contents) + close + close = self.closing_wiki_markup or "" + return self.wiki_markup + attrs + padding + separator + \ + str(self.contents) + close result = (" +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -22,7 +22,9 @@ from collections import defaultdict import re -from . import HTMLEntity, Node, Text +from ._base import Node +from .html_entity import HTMLEntity +from .text import Text from .extras import Parameter from ..utils import parse_anything @@ -43,12 +45,11 @@ class Template(Node): else: self._params = [] - def __unicode__(self): + def __str__(self): if self.params: params = "|".join([str(param) for param in self.params]) return "{{" + str(self.name) + "|" + params + "}}" - else: - return "{{" + str(self.name) + "}}" + return "{{" + str(self.name) + "}}" def __children__(self): yield self.name @@ -102,6 +103,7 @@ class Template(Node): confidence = float(best) / sum(values) if confidence > 0.5: return tuple(theories.keys())[values.index(best)] + return None @staticmethod def _blank_param_value(value): @@ -229,8 +231,7 @@ class Template(Node): return param if default is _UNSET: raise ValueError(name) - else: - return default + return default def __getitem__(self, name): return self.get(name) @@ -339,19 +340,20 @@ class Template(Node): hidden name, if it exists, or the first instance). """ if isinstance(param, Parameter): - return self._remove_exact(param, keep_field) + self._remove_exact(param, keep_field) + return name = str(param).strip() removed = False to_remove = [] - for i, param in enumerate(self.params): - if param.name.strip() == name: + for i, par in enumerate(self.params): + if par.name.strip() == name: if keep_field: if self._should_remove(i, name): to_remove.append(i) else: - self._blank_param_value(param.value) + self._blank_param_value(par.value) keep_field = False else: self._fix_dependendent_params(i) diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index b07eedc..cce670c 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node __all__ = ["Text"] @@ -31,7 +31,7 @@ class Text(Node): super().__init__() self.value = value - def __unicode__(self): + def __str__(self): return self.value def __strip__(self, **kwargs): diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 98ae75f..fc78833 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ # SOFTWARE. -from . import Node +from ._base import Node from ..utils import parse_anything __all__ = ["Wikilink"] @@ -33,7 +33,7 @@ class Wikilink(Node): self.title = title self.text = text - def __unicode__(self): + def __str__(self): if self.text is not None: return "[[" + str(self.title) + "|" + str(self.text) + "]]" return "[[" + str(self.title) + "]]" diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index fb1bf20..cde45c5 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -25,20 +25,8 @@ modules: the :mod:`.tokenizer` and the :mod:`.builder`. This module joins them together into one interface. """ -class ParserError(Exception): - """Exception raised when an internal error occurs while parsing. - - This does not mean that the wikicode was invalid, because invalid markup - should still be parsed correctly. This means that the parser caught itself - with an impossible internal state and is bailing out before other problems - can happen. Its appearance indicates a bug. - """ - def __init__(self, extra): - msg = "This is a bug and should be reported. Info: {}.".format(extra) - super().__init__(msg) - - from .builder import Builder +from .errors import ParserError try: from ._tokenizer import CTokenizer use_c = True diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 1ae2150..4c14b2a 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,8 @@ # SOFTWARE. -from . import tokens, ParserError +from . import tokens +from .errors import ParserError from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from ..nodes.extras import Attribute, Parameter @@ -198,8 +199,7 @@ class Builder: if isinstance(token, tokens.HeadingEnd): title = self._pop() return Heading(title, level) - else: - self._write(self._handle_token(token)) + self._write(self._handle_token(token)) raise ParserError("_handle_heading() missed a close token") @_add_handler(tokens.CommentStart) @@ -211,8 +211,7 @@ class Builder: if isinstance(token, tokens.CommentEnd): contents = self._pop() return Comment(contents) - else: - self._write(self._handle_token(token)) + self._write(self._handle_token(token)) raise ParserError("_handle_comment() missed a close token") def _handle_attribute(self, start): @@ -283,7 +282,7 @@ class Builder: return _HANDLERS[type(token)](self, token) except KeyError: err = "_handle_token() got unexpected {0}" - raise ParserError(err.format(type(token).__name__)) + raise ParserError(err.format(type(token).__name__)) from None def build(self, tokenlist): """Build a Wikicode object from a list tokens and return it.""" diff --git a/mwparserfromhell/parser/ctokenizer/definitions.c b/mwparserfromhell/parser/ctokenizer/definitions.c index b1ff278..323d8a1 100644 --- a/mwparserfromhell/parser/ctokenizer/definitions.c +++ b/mwparserfromhell/parser/ctokenizer/definitions.c @@ -1,5 +1,5 @@ /* -Copyright (C) 2012-2016 Ben Kurtovic +Copyright (C) 2012-2020 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/mwparserfromhell/parser/errors.py b/mwparserfromhell/parser/errors.py new file mode 100644 index 0000000..adf3d5d --- /dev/null +++ b/mwparserfromhell/parser/errors.py @@ -0,0 +1,34 @@ +# +# Copyright (C) 2012-2020 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +__all__ = ["ParserError"] + +class ParserError(Exception): + """Exception raised when an internal error occurs while parsing. + + This does not mean that the wikicode was invalid, because invalid markup + should still be parsed correctly. This means that the parser caught itself + with an impossible internal state and is bailing out before other problems + can happen. Its appearance indicates a bug. + """ + def __init__(self, extra): + msg = "This is a bug and should be reported. Info: {}.".format(extra) + super().__init__(msg) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index a95c477..93b79d9 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,8 @@ import html.entities as htmlentities from math import log import re -from . import contexts, tokens, ParserError +from . import contexts, tokens +from .errors import ParserError from ..definitions import (get_html_tag, is_parsable, is_single, is_single_only, is_scheme) @@ -323,7 +324,7 @@ class Tokenizer: self._head += 2 try: # If the wikilink looks like an external link, parse it as such: - link, extra, delta = self._really_parse_external_link(True) + link, _extra, _delta = self._really_parse_external_link(True) except BadRoute: self._head = reset + 1 try: @@ -433,17 +434,17 @@ class Tokenizer: self._emit_text(this) return punct, tail - def _is_free_link_end(self, this, next): + def _is_free_link_end(self, this, nxt): """Return whether the current head is the end of a free link.""" # Built from _parse()'s end sentinels: after, ctx = self._read(2), self._context equal_sign_contexts = contexts.TEMPLATE_PARAM_KEY | contexts.HEADING return (this in (self.END, "\n", "[", "]", "<", ">") or - this == next == "'" or + this == nxt == "'" or (this == "|" and ctx & contexts.TEMPLATE) or (this == "=" and ctx & equal_sign_contexts) or - (this == next == "}" and ctx & contexts.TEMPLATE) or - (this == next == after == "}" and ctx & contexts.ARGUMENT)) + (this == nxt == "}" and ctx & contexts.TEMPLATE) or + (this == nxt == after == "}" and ctx & contexts.ARGUMENT)) def _really_parse_external_link(self, brackets): """Really parse an external link.""" @@ -458,23 +459,23 @@ class Tokenizer: self._fail_route() tail = "" while True: - this, next = self._read(), self._read(1) + this, nxt = self._read(), self._read(1) if this == "&": if tail: self._emit_text(tail) tail = "" self._parse_entity() - elif (this == "<" and next == "!" and self._read(2) == + elif (this == "<" and nxt == "!" and self._read(2) == self._read(3) == "-"): if tail: self._emit_text(tail) tail = "" self._parse_comment() - elif not brackets and self._is_free_link_end(this, next): + elif not brackets and self._is_free_link_end(this, nxt): return self._pop(), tail, -1 elif this is self.END or this == "\n": self._fail_route() - elif this == next == "{" and self._can_recurse(): + elif this == nxt == "{" and self._can_recurse(): if tail: self._emit_text(tail) tail = "" @@ -702,12 +703,12 @@ class Tokenizer: def _handle_tag_text(self, text): """Handle regular *text* inside of an HTML open tag.""" - next = self._read(1) + nxt = self._read(1) if not self._can_recurse() or text not in self.MARKERS: self._emit_text(text) - elif text == next == "{": + elif text == nxt == "{": self._parse_template_or_argument() - elif text == next == "[": + elif text == nxt == "[": self._parse_wikilink() elif text == "<": self._parse_tag() @@ -796,10 +797,10 @@ class Tokenizer: """Handle the body of an HTML tag that is parser-blacklisted.""" strip = lambda text: text.rstrip().lower() while True: - this, next = self._read(), self._read(1) + this, nxt = self._read(), self._read(1) if this is self.END: self._fail_route() - elif this == "<" and next == "/": + elif this == "<" and nxt == "/": self._head += 3 if self._read() != ">" or (strip(self._read(-1)) != strip(self._stack[1].text)): @@ -854,7 +855,7 @@ class Tokenizer: self._push(contexts.TAG_OPEN) self._emit(tokens.TagOpenOpen()) while True: - this, next = self._read(), self._read(1) + this, nxt = self._read(), self._read(1) can_exit = (not data.context & (data.CX_QUOTED | data.CX_NAME) or data.context & data.CX_NOTE_SPACE) if this is self.END: @@ -876,7 +877,7 @@ class Tokenizer: if is_parsable(self._stack[1].text): return self._parse(push=False) return self._handle_blacklisted_tag() - elif this == "/" and next == ">" and can_exit: + elif this == "/" and nxt == ">" and can_exit: self._handle_tag_close_open(data, tokens.TagCloseSelfclose) return self._pop() else: @@ -933,9 +934,11 @@ class Tokenizer: stack = self._parse(new_ctx) except BadRoute: self._head = reset - return self._emit_text("''") + self._emit_text("''") + return else: - return self._emit_text("''") + self._emit_text("''") + return self._emit_style_tag("i", "''", stack) def _parse_bold(self): @@ -948,7 +951,7 @@ class Tokenizer: if self._context & contexts.STYLE_SECOND_PASS: self._emit_text("'") return True - elif self._context & contexts.STYLE_ITALICS: + if self._context & contexts.STYLE_ITALICS: self._context |= contexts.STYLE_PASS_AGAIN self._emit_text("'''") else: @@ -956,6 +959,7 @@ class Tokenizer: self._parse_italics() else: self._emit_style_tag("b", "'''", stack) + return False def _parse_italics_and_bold(self): """Parse wiki-style italics and bold together (i.e., five ticks).""" @@ -1017,7 +1021,7 @@ class Tokenizer: if ticks == 5: self._head -= 3 if italics else 2 return self._pop() - elif not self._can_recurse(): + if not self._can_recurse(): if ticks == 3: if self._context & contexts.STYLE_SECOND_PASS: self._emit_text("'") @@ -1101,7 +1105,7 @@ class Tokenizer: if this.isspace(): data.padding_buffer["first"] += this return data.padding_buffer["first"] - elif this is self.END or this == end_token: + if this is self.END or this == end_token: if self._context & contexts.TAG_ATTR: if data.context & data.CX_QUOTED: # Unclosed attribute quote: reset, don't die @@ -1241,9 +1245,9 @@ class Tokenizer: if context & contexts.FAIL_NEXT: return False if context & contexts.WIKILINK_TITLE: - if this == "]" or this == "{": + if this in ("]", "{"): self._context |= contexts.FAIL_NEXT - elif this == "\n" or this == "[" or this == "}" or this == ">": + elif this in ("\n", "[", "}", ">"): return False elif this == "<": if self._read(1) == "!": @@ -1251,16 +1255,16 @@ class Tokenizer: else: return False return True - elif context & contexts.EXT_LINK_TITLE: + if context & contexts.EXT_LINK_TITLE: return this != "\n" - elif context & contexts.TEMPLATE_NAME: + if context & contexts.TEMPLATE_NAME: if this == "{": self._context |= contexts.HAS_TEMPLATE | contexts.FAIL_NEXT return True if this == "}" or (this == "<" and self._read(1) == "!"): self._context |= contexts.FAIL_NEXT return True - if this == "[" or this == "]" or this == "<" or this == ">": + if this in ("[", "]", "<", ">"): return False if this == "|": return True @@ -1273,30 +1277,29 @@ class Tokenizer: elif this is self.END or not this.isspace(): self._context |= contexts.HAS_TEXT return True - elif context & contexts.TAG_CLOSE: + if context & contexts.TAG_CLOSE: return this != "<" - else: - if context & contexts.FAIL_ON_EQUALS: - if this == "=": - return False - elif context & contexts.FAIL_ON_LBRACE: - if this == "{" or (self._read(-1) == self._read(-2) == "{"): - if context & contexts.TEMPLATE: - self._context |= contexts.FAIL_ON_EQUALS - else: - self._context |= contexts.FAIL_NEXT - return True - self._context ^= contexts.FAIL_ON_LBRACE - elif context & contexts.FAIL_ON_RBRACE: - if this == "}": + if context & contexts.FAIL_ON_EQUALS: + if this == "=": + return False + elif context & contexts.FAIL_ON_LBRACE: + if this == "{" or (self._read(-1) == self._read(-2) == "{"): + if context & contexts.TEMPLATE: + self._context |= contexts.FAIL_ON_EQUALS + else: self._context |= contexts.FAIL_NEXT - return True - self._context ^= contexts.FAIL_ON_RBRACE - elif this == "{": - self._context |= contexts.FAIL_ON_LBRACE - elif this == "}": - self._context |= contexts.FAIL_ON_RBRACE - return True + return True + self._context ^= contexts.FAIL_ON_LBRACE + elif context & contexts.FAIL_ON_RBRACE: + if this == "}": + self._context |= contexts.FAIL_NEXT + return True + self._context ^= contexts.FAIL_ON_RBRACE + elif this == "{": + self._context |= contexts.FAIL_ON_LBRACE + elif this == "}": + self._context |= contexts.FAIL_ON_RBRACE + return True def _parse(self, context=0, push=True): """Parse the wikicode string, using *context* for when to stop.""" @@ -1315,8 +1318,8 @@ class Tokenizer: continue if this is self.END: return self._handle_end() - next = self._read(1) - if this == next == "{": + nxt = self._read(1) + if this == nxt == "{": if self._can_recurse(): self._parse_template_or_argument() else: @@ -1325,23 +1328,22 @@ class Tokenizer: self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: self._handle_template_param_value() - elif this == next == "}" and self._context & contexts.TEMPLATE: + elif this == nxt == "}" and self._context & contexts.TEMPLATE: return self._handle_template_end() elif this == "|" and self._context & contexts.ARGUMENT_NAME: self._handle_argument_separator() - elif this == next == "}" and self._context & contexts.ARGUMENT: + elif this == nxt == "}" and self._context & contexts.ARGUMENT: if self._read(2) == "}": return self._handle_argument_end() - else: - self._emit_text("}") - elif this == next == "[" and self._can_recurse(): + self._emit_text("}") + elif this == nxt == "[" and self._can_recurse(): if not self._context & contexts.NO_WIKILINKS: self._parse_wikilink() else: self._emit_text("[") elif this == "|" and self._context & contexts.WIKILINK_TITLE: self._handle_wikilink_separator() - elif this == next == "]" and self._context & contexts.WIKILINK: + elif this == nxt == "]" and self._context & contexts.WIKILINK: return self._handle_wikilink_end() elif this == "[": self._parse_external_link(True) @@ -1360,12 +1362,12 @@ class Tokenizer: self._fail_route() elif this == "&": self._parse_entity() - elif this == "<" and next == "!": + elif this == "<" and nxt == "!": if self._read(2) == self._read(3) == "-": self._parse_comment() else: self._emit_text(this) - elif this == "<" and next == "/" and self._read(2) is not self.END: + elif this == "<" and nxt == "/" and self._read(2) is not self.END: if self._context & contexts.TAG_BODY: self._handle_tag_open_close() else: @@ -1377,14 +1379,14 @@ class Tokenizer: self._emit_text("<") elif this == ">" and self._context & contexts.TAG_CLOSE: return self._handle_tag_close_close() - elif this == next == "'" and not self._skip_style_tags: + elif this == nxt == "'" and not self._skip_style_tags: result = self._parse_style() if result is not None: return result elif self._read(-1) in ("\n", self.START) and this in ("#", "*", ";", ":"): self._handle_list() elif self._read(-1) in ("\n", self.START) and ( - this == next == self._read(2) == self._read(3) == "-"): + this == nxt == self._read(2) == self._read(3) == "-"): self._handle_hr() elif this in ("\n", ":") and self._context & contexts.DL_TERM: self._handle_dl_term() @@ -1392,7 +1394,7 @@ class Tokenizer: # Kill potential table contexts self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS # Start of table parsing - elif this == "{" and next == "|" and ( + elif this == "{" and nxt == "|" and ( self._read(-1) in ("\n", self.START) or (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): if self._can_recurse(): @@ -1400,15 +1402,15 @@ class Tokenizer: else: self._emit_text("{") elif self._context & contexts.TABLE_OPEN: - if this == next == "|" and self._context & contexts.TABLE_TD_LINE: + if this == nxt == "|" and self._context & contexts.TABLE_TD_LINE: if self._context & contexts.TABLE_CELL_OPEN: return self._handle_table_cell_end() self._handle_table_cell("||", "td", contexts.TABLE_TD_LINE) - elif this == next == "|" and self._context & contexts.TABLE_TH_LINE: + elif this == nxt == "|" and self._context & contexts.TABLE_TH_LINE: if self._context & contexts.TABLE_CELL_OPEN: return self._handle_table_cell_end() self._handle_table_cell("||", "th", contexts.TABLE_TH_LINE) - elif this == next == "!" and self._context & contexts.TABLE_TH_LINE: + elif this == nxt == "!" and self._context & contexts.TABLE_TH_LINE: if self._context & contexts.TABLE_CELL_OPEN: return self._handle_table_cell_end() self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE) @@ -1420,13 +1422,13 @@ class Tokenizer: self._emit_text(this) elif (self._read(-1) in ("\n", self.START) or (self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): - if this == "|" and next == "}": + if this == "|" and nxt == "}": if self._context & contexts.TABLE_CELL_OPEN: return self._handle_table_cell_end() if self._context & contexts.TABLE_ROW_OPEN: return self._handle_table_row_end() return self._handle_table_end() - elif this == "|" and next == "-": + if this == "|" and nxt == "-": if self._context & contexts.TABLE_CELL_OPEN: return self._handle_table_cell_end() if self._context & contexts.TABLE_ROW_OPEN: @@ -1458,10 +1460,10 @@ class Tokenizer: self._skip_style_tags = skip_style_tags try: - tokens = self._parse(context) - except BadRoute: # pragma: no cover (untestable/exceptional case) - raise ParserError("Python tokenizer exited with BadRoute") + result = self._parse(context) + except BadRoute as exc: # pragma: no cover (untestable/exceptional case) + raise ParserError("Python tokenizer exited with BadRoute") from exc if self._stacks: # pragma: no cover (untestable/exceptional case) err = "Python tokenizer exited with non-empty token stack" raise ParserError(err) - return tokens + return result diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index ec99c67..257ed89 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ a syntactically valid form by the :class:`.Tokenizer`, and then converted into the :class`.Wikicode` tree by the :class:`.Builder`. """ - __all__ = ["Token"] class Token(dict): diff --git a/mwparserfromhell/smart_list/__init__.py b/mwparserfromhell/smart_list/__init__.py index fdf7bd8..723d992 100644 --- a/mwparserfromhell/smart_list/__init__.py +++ b/mwparserfromhell/smart_list/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -22,8 +22,9 @@ """ This module contains the :class:`.SmartList` type, as well as its -:class:`._ListProxy` child, which together implement a list whose sublists +:class:`.ListProxy` child, which together implement a list whose sublists reflect changes made to the main list, and vice-versa. """ -from .SmartList import SmartList +from .list_proxy import ListProxy as _ListProxy +from .smart_list import SmartList diff --git a/mwparserfromhell/smart_list/ListProxy.py b/mwparserfromhell/smart_list/list_proxy.py similarity index 89% rename from mwparserfromhell/smart_list/ListProxy.py rename to mwparserfromhell/smart_list/list_proxy.py index 35b45dc..f1525fc 100644 --- a/mwparserfromhell/smart_list/ListProxy.py +++ b/mwparserfromhell/smart_list/list_proxy.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -20,12 +20,10 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -# SmartList has to be a full import in order to avoid cyclical import errors -import mwparserfromhell.smart_list.SmartList from .utils import _SliceNormalizerMixIn, inheritdoc -class _ListProxy(_SliceNormalizerMixIn, list): +class ListProxy(_SliceNormalizerMixIn, list): """Implement the ``list`` interface by getting elements from a parent. This is created by a :class:`.SmartList` object when slicing. It does not @@ -42,32 +40,32 @@ class _ListProxy(_SliceNormalizerMixIn, list): return repr(self._render()) def __lt__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() < list(other) return self._render() < other def __le__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() <= list(other) return self._render() <= other def __eq__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() == list(other) return self._render() == other def __ne__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() != list(other) return self._render() != other def __gt__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() > list(other) return self._render() > other def __ge__(self, other): - if isinstance(other, _ListProxy): + if isinstance(other, ListProxy): return self._render() >= list(other) return self._render() >= other @@ -84,8 +82,7 @@ class _ListProxy(_SliceNormalizerMixIn, list): keystop = min(self._start + key.stop, self._stop) adjusted = slice(keystart, keystop, key.step) return self._parent[adjusted] - else: - return self._render()[key] + return self._render()[key] def __setitem__(self, key, item): if isinstance(key, slice): @@ -133,20 +130,20 @@ class _ListProxy(_SliceNormalizerMixIn, list): return item in self._render() def __add__(self, other): - return mwparserfromhell.smart_list.SmartList(list(self) + other) + return type(self._parent)(list(self) + other) def __radd__(self, other): - return mwparserfromhell.smart_list.SmartList(other + list(self)) + return type(self._parent)(other + list(self)) def __iadd__(self, other): self.extend(other) return self def __mul__(self, other): - return mwparserfromhell.smart_list.SmartList(list(self) * other) + return type(self._parent)(list(self) * other) def __rmul__(self, other): - return mwparserfromhell.smart_list.SmartList(other * list(self)) + return type(self._parent)(other * list(self)) def __imul__(self, other): self.extend(list(self) * (other - 1)) diff --git a/mwparserfromhell/smart_list/SmartList.py b/mwparserfromhell/smart_list/smart_list.py similarity index 91% rename from mwparserfromhell/smart_list/SmartList.py rename to mwparserfromhell/smart_list/smart_list.py index c2e83a4..f83e181 100644 --- a/mwparserfromhell/smart_list/SmartList.py +++ b/mwparserfromhell/smart_list/smart_list.py @@ -1,4 +1,5 @@ -# Copyright (C) 2012-2016 Ben Kurtovic +# +# Copyright (C) 2012-2020 Ben Kurtovic # Copyright (C) 2019-2020 Yuri Astrakhan # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -19,9 +20,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from _weakref import ref +from weakref import ref -from .ListProxy import _ListProxy +from .list_proxy import ListProxy from .utils import _SliceNormalizerMixIn, inheritdoc @@ -32,7 +33,7 @@ class SmartList(_SliceNormalizerMixIn, list): list (such as the addition, removal, or replacement of elements) will be reflected in the sublist, or vice-versa, to the greatest degree possible. This is implemented by having sublists - instances of the - :class:`._ListProxy` type - dynamically determine their elements by storing + :class:`.ListProxy` type - dynamically determine their elements by storing their slice info and retrieving that slice from the parent. Methods that change the size of the list also change the slice info. For example:: @@ -61,21 +62,22 @@ class SmartList(_SliceNormalizerMixIn, list): return super().__getitem__(key) key = self._normalize_slice(key, clamp=False) sliceinfo = [key.start, key.stop, key.step] - child = _ListProxy(self, sliceinfo) + child = ListProxy(self, sliceinfo) child_ref = ref(child, self._delete_child) self._children[id(child_ref)] = (child_ref, sliceinfo) return child def __setitem__(self, key, item): if not isinstance(key, slice): - return super().__setitem__(key, item) + super().__setitem__(key, item) + return item = list(item) super().__setitem__(key, item) key = self._normalize_slice(key, clamp=True) diff = len(item) + (key.start - key.stop) // key.step if not diff: return - for child, (start, stop, step) in self._children.values(): + for child, (start, stop, _step) in self._children.values(): if start > key.stop: self._children[id(child)][1][0] += diff if stop is not None and stop >= key.stop: @@ -88,7 +90,7 @@ class SmartList(_SliceNormalizerMixIn, list): else: key = slice(key, key + 1, 1) diff = (key.stop - key.start) // key.step - for child, (start, stop, step) in self._children.values(): + for child, (start, stop, _step) in self._children.values(): if start > key.start: self._children[id(child)][1][0] -= diff if stop is not None and stop >= key.stop: diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 564706d..f39cce7 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -38,67 +38,64 @@ def inheritdoc(method): return method class StringMixIn: - """Implement the interface for ``unicode``/``str`` in a dynamic manner. + """Implement the interface for ``str`` in a dynamic manner. - To use this class, inherit from it and override the :meth:`__unicode__` - method to return the string representation of the object. - The various string methods will operate on the value of :meth:`__unicode__` - instead of the immutable ``self`` like the regular ``str`` type. + To use this class, inherit from it and override the :meth:`__str__` method + to return the string representation of the object. The various string + methods will operate on the value of :meth:`__str__` instead of the + immutable ``self`` like the regular ``str`` type. """ def __str__(self): - return self.__unicode__() + raise NotImplementedError() def __bytes__(self): - return bytes(self.__unicode__(), getdefaultencoding()) - - def __unicode__(self): - raise NotImplementedError() + return bytes(self.__str__(), getdefaultencoding()) def __repr__(self): - return repr(self.__unicode__()) + return repr(self.__str__()) def __lt__(self, other): - return self.__unicode__() < other + return self.__str__() < other def __le__(self, other): - return self.__unicode__() <= other + return self.__str__() <= other def __eq__(self, other): - return self.__unicode__() == other + return self.__str__() == other def __ne__(self, other): - return self.__unicode__() != other + return self.__str__() != other def __gt__(self, other): - return self.__unicode__() > other + return self.__str__() > other def __ge__(self, other): - return self.__unicode__() >= other + return self.__str__() >= other def __bool__(self): - return bool(self.__unicode__()) + return bool(self.__str__()) def __len__(self): - return len(self.__unicode__()) + return len(self.__str__()) def __iter__(self): - yield from self.__unicode__() + yield from self.__str__() def __getitem__(self, key): - return self.__unicode__()[key] + return self.__str__()[key] def __reversed__(self): - return reversed(self.__unicode__()) + return reversed(self.__str__()) def __contains__(self, item): - return str(item) in self.__unicode__() + return str(item) in self.__str__() def __getattr__(self, attr): if not hasattr(str, attr): raise AttributeError("{!r} object has no attribute {!r}".format( type(self).__name__, attr)) - return getattr(self.__unicode__(), attr) + return getattr(self.__str__(), attr) maketrans = str.maketrans # Static method can't rely on __getattr__ diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 9e5e14b..8fa3a96 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -24,48 +24,47 @@ This module contains accessory functions for other parts of the library. Parser users generally won't need stuff from here. """ - -from .nodes import Node -from .smart_list import SmartList - __all__ = ["parse_anything"] def parse_anything(value, context=0, skip_style_tags=False): """Return a :class:`.Wikicode` for *value*, allowing multiple types. This differs from :meth:`.Parser.parse` in that we accept more than just a - string to be parsed. Unicode objects (strings in py3k), strings (bytes in - py3k), integers (converted to strings), ``None``, existing :class:`.Node` - or :class:`.Wikicode` objects, as well as an iterable of these types, are - supported. This is used to parse input on-the-fly by various methods of - :class:`.Wikicode` and others like :class:`.Template`, such as - :meth:`wikicode.insert() <.Wikicode.insert>` or setting - :meth:`template.name <.Template.name>`. + string to be parsed. Strings, bytes, integers (converted to strings), + ``None``, existing :class:`.Node` or :class:`.Wikicode` objects, as well + as an iterable of these types, are supported. This is used to parse input + on-the-fly by various methods of :class:`.Wikicode` and others like + :class:`.Template`, such as :meth:`wikicode.insert() <.Wikicode.insert>` + or setting :meth:`template.name <.Template.name>`. Additional arguments are passed directly to :meth:`.Parser.parse`. """ + # pylint: disable=cyclic-import,import-outside-toplevel + from .nodes import Node from .parser import Parser + from .smart_list import SmartList from .wikicode import Wikicode if isinstance(value, Wikicode): return value - elif isinstance(value, Node): + if isinstance(value, Node): return Wikicode(SmartList([value])) - elif isinstance(value, str): + if isinstance(value, str): return Parser().parse(value, context, skip_style_tags) - elif isinstance(value, bytes): + if isinstance(value, bytes): return Parser().parse(value.decode("utf8"), context, skip_style_tags) - elif isinstance(value, int): + if isinstance(value, int): return Parser().parse(str(value), context, skip_style_tags) - elif value is None: + if value is None: return Wikicode(SmartList()) - elif hasattr(value, "read"): + if hasattr(value, "read"): return parse_anything(value.read(), context, skip_style_tags) try: nodelist = SmartList() for item in value: nodelist += parse_anything(item, context, skip_style_tags).nodes return Wikicode(nodelist) - except TypeError: - error = "Needs string, Node, Wikicode, file, int, None, or iterable of these, but got {0}: {1}" - raise ValueError(error.format(type(value).__name__, value)) + except TypeError as exc: + raise ValueError(f"Needs string, Node, Wikicode, file, int, None, or " + f"iterable of these, but got {type(value).__name__}: " + f"{value}") from exc diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index f72c26b..381d938 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,7 @@ from itertools import chain from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Node, Tag, Template, Text, Wikilink) -from .smart_list.ListProxy import _ListProxy +from .smart_list.list_proxy import ListProxy from .string_mixin import StringMixIn from .utils import parse_anything @@ -48,7 +48,7 @@ class Wikicode(StringMixIn): super().__init__() self._nodes = nodes - def __unicode__(self): + def __str__(self): return "".join([str(node) for node in self.nodes]) @staticmethod @@ -108,7 +108,7 @@ class Wikicode(StringMixIn): def _is_child_wikicode(self, obj, recursive=True): """Return whether the given :class:`.Wikicode` is a descendant.""" def deref(nodes): - if isinstance(nodes, _ListProxy): + if isinstance(nodes, ListProxy): return nodes._parent # pylint: disable=protected-access return nodes @@ -249,12 +249,12 @@ class Wikicode(StringMixIn): make_filter = lambda ftype: (lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw)) for name, ftype in meths.items(): - ifilter = make_ifilter(ftype) - filter = make_filter(ftype) - ifilter.__doc__ = doc.format(name, "ifilter", ftype) - filter.__doc__ = doc.format(name, "filter", ftype) - setattr(cls, "ifilter_" + name, ifilter) - setattr(cls, "filter_" + name, filter) + ifilt = make_ifilter(ftype) + filt = make_filter(ftype) + ifilt.__doc__ = doc.format(name, "ifilter", ftype) + filt.__doc__ = doc.format(name, "filter", ftype) + setattr(cls, "ifilter_" + name, ifilt) + setattr(cls, "filter_" + name, filt) @property def nodes(self): @@ -351,6 +351,7 @@ class Wikicode(StringMixIn): ancestors = _get_ancestors(code, needle) if ancestors is not None: return [node] + ancestors + return None if isinstance(obj, Wikicode): obj = obj.get(0) @@ -443,13 +444,13 @@ class Wikicode(StringMixIn): """ if isinstance(obj, (Node, Wikicode)): context, index = self._do_strong_search(obj, recursive) - for i in range(index.start, index.stop): + for _ in range(index.start, index.stop): context.nodes.pop(index.start) context.insert(index.start, value) else: for exact, context, index in self._do_weak_search(obj, recursive): if exact: - for i in range(index.start, index.stop): + for _ in range(index.start, index.stop): context.nodes.pop(index.start) context.insert(index.start, value) else: @@ -478,12 +479,12 @@ class Wikicode(StringMixIn): """ if isinstance(obj, (Node, Wikicode)): context, index = self._do_strong_search(obj, recursive) - for i in range(index.start, index.stop): + for _ in range(index.start, index.stop): context.nodes.pop(index.start) else: for exact, context, index in self._do_weak_search(obj, recursive): if exact: - for i in range(index.start, index.stop): + for _ in range(index.start, index.stop): context.nodes.pop(index.start) else: self._slice_replace(context, index, str(obj), "") @@ -645,8 +646,7 @@ class Wikicode(StringMixIn): while "\n\n\n" in stripped: stripped = stripped.replace("\n\n\n", "\n\n") return stripped - else: - return "".join(nodes) + return "".join(nodes) def get_tree(self): """Return a hierarchical tree representation of the object. diff --git a/scripts/memtest.py b/scripts/memtest.py index f60e260..3da1fcc 100644 --- a/scripts/memtest.py +++ b/scripts/memtest.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,10 @@ # SOFTWARE. """ -Tests for memory leaks in the CTokenizer. Python 2 and 3 compatible. +Tests for memory leaks in the CTokenizer. This appears to work mostly fine under Linux, but gives an absurd number of -false positives on OS X. I'm not sure why. Running the tests multiple times +false positives on macOS. I'm not sure why. Running the tests multiple times yields different results (tests don't always leak, and the amount they leak by varies). Increasing the number of loops results in a smaller bytes/loop value, too, indicating the increase in memory usage might be due to something else. @@ -32,7 +32,6 @@ Actual memory leaks typically leak very large amounts of memory (megabytes) and scale with the number of loops. """ -from __future__ import unicode_literals, print_function from locale import LC_ALL, setlocale from multiprocessing import Process, Pipe from os import listdir, path @@ -42,19 +41,16 @@ import psutil from mwparserfromhell.parser._tokenizer import CTokenizer -if sys.version_info[0] == 2: - range = xrange - LOOPS = 10000 -class Color(object): +class Color: GRAY = "\x1b[30;1m" GREEN = "\x1b[92m" YELLOW = "\x1b[93m" RESET = "\x1b[0m" -class MemoryTest(object): +class MemoryTest: """Manages a memory test.""" def __init__(self): @@ -151,13 +147,13 @@ class MemoryTest(object): def _runner(text, child): r1, r2 = range(250), range(LOOPS) - for i in r1: + for _ in r1: CTokenizer().tokenize(text) child.send("OK") child.recv() child.send("OK") child.recv() - for i in r2: + for _ in r2: CTokenizer().tokenize(text) child.send("OK") child.recv() diff --git a/setup.py b/setup.py index d404ead..6fee34a 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #! /usr/bin/env python # -# Copyright (C) 2012-2018 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index f61cb10..2629671 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,6 @@ from mwparserfromhell.parser.builder import Builder class _TestParseError(Exception): """Raised internally when a test could not be parsed.""" - pass class TokenizerTestCase: @@ -41,7 +40,7 @@ class TokenizerTestCase: """ @staticmethod - def _build_test_method(funcname, data): + def _build_test_method(data): """Create and return a method to be treated as a test case method. *data* is a dict containing multiple keys: the *input* text to be @@ -79,7 +78,7 @@ class TokenizerTestCase: try: data["output"] = eval(raw, vars(tokens)) except Exception as err: - raise _TestParseError(err) + raise _TestParseError(err) from err @classmethod def _load_tests(cls, filename, name, text, restrict=None): @@ -115,7 +114,7 @@ class TokenizerTestCase: continue fname = "test_{}{}_{}".format(name, number, data["name"]) - meth = cls._build_test_method(fname, data) + meth = cls._build_test_method(data) setattr(cls, fname, meth) @classmethod diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index cdfbd3a..407711e 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,6 @@ from unittest import TestCase from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, Text, Wikilink) -from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode diff --git a/tests/test_argument.py b/tests/test_argument.py index eaf8abe..110436a 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestArgument(TreeEqualityTestCase): """Test cases for the Argument node.""" - def test_unicode(self): - """test Argument.__unicode__()""" + def test_str(self): + """test Argument.__str__()""" node = Argument(wraptext("foobar")) self.assertEqual("{{{foobar}}}", str(node)) node2 = Argument(wraptext("foo"), wraptext("bar")) diff --git a/tests/test_attribute.py b/tests/test_attribute.py index b0d0e85..a8be214 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2019 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,8 +29,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestAttribute(TreeEqualityTestCase): """Test cases for the Attribute node extra.""" - def test_unicode(self): - """test Attribute.__unicode__()""" + def test_str(self): + """test Attribute.__str__()""" node = Attribute(wraptext("foo")) self.assertEqual(" foo", str(node)) node2 = Attribute(wraptext("foo"), wraptext("bar")) diff --git a/tests/test_comment.py b/tests/test_comment.py index 1024e60..60cbba6 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase class TestComment(TreeEqualityTestCase): """Test cases for the Comment node.""" - def test_unicode(self): - """test Comment.__unicode__()""" + def test_str(self): + """test Comment.__str__()""" node = Comment("foobar") self.assertEqual("", str(node)) diff --git a/tests/test_docs.py b/tests/test_docs.py index 2e78106..101a347 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -31,10 +31,10 @@ import mwparserfromhell class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" - def assertPrint(self, input, output): - """Assertion check that *input*, when printed, produces *output*.""" + def assertPrint(self, value, output): + """Assertion check that *value*, when printed, produces *output*.""" buff = StringIO() - print(input, end="", file=buff) + print(value, end="", file=buff) buff.seek(0) self.assertEqual(output, buff.read()) diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 48a7b82..1323109 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestExternalLink(TreeEqualityTestCase): """Test cases for the ExternalLink node.""" - def test_unicode(self): - """test ExternalLink.__unicode__()""" + def test_str(self): + """test ExternalLink.__str__()""" node = ExternalLink(wraptext("http://example.com/"), brackets=False) self.assertEqual("http://example.com/", str(node)) node2 = ExternalLink(wraptext("http://example.com/")) diff --git a/tests/test_heading.py b/tests/test_heading.py index 46c6258..a031332 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestHeading(TreeEqualityTestCase): """Test cases for the Heading node.""" - def test_unicode(self): - """test Heading.__unicode__()""" + def test_str(self): + """test Heading.__str__()""" node = Heading(wraptext("foobar"), 2) self.assertEqual("==foobar==", str(node)) node2 = Heading(wraptext(" zzz "), 5) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 273ee21..d3a9bd2 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,13 +23,13 @@ import unittest from mwparserfromhell.nodes import HTMLEntity -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase class TestHTMLEntity(TreeEqualityTestCase): """Test cases for the HTMLEntity node.""" - def test_unicode(self): - """test HTMLEntity.__unicode__()""" + def test_str(self): + """test HTMLEntity.__str__()""" node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) node2 = HTMLEntity("107", named=False, hexadecimal=False) node3 = HTMLEntity("6b", named=False, hexadecimal=True) diff --git a/tests/test_parameter.py b/tests/test_parameter.py index d53c7af..3d0028e 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,16 +21,15 @@ import unittest -from mwparserfromhell.nodes import Text from mwparserfromhell.nodes.extras import Parameter -from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, wraptext class TestParameter(TreeEqualityTestCase): """Test cases for the Parameter node extra.""" - def test_unicode(self): - """test Parameter.__unicode__()""" + def test_str(self): + """test Parameter.__str__()""" node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) self.assertEqual("foo", str(node)) node2 = Parameter(wraptext("foo"), wraptext("bar")) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 16d99e7..58b327a 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -22,11 +22,11 @@ import unittest from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.smart_list.ListProxy import _ListProxy +from mwparserfromhell.smart_list.list_proxy import ListProxy class TestSmartList(unittest.TestCase): - """Test cases for the SmartList class and its child, _ListProxy.""" + """Test cases for the SmartList class and its child, ListProxy.""" def _test_get_set_del_item(self, builder): """Run tests on __get/set/delitem__ of a list built with *builder*.""" @@ -178,7 +178,7 @@ class TestSmartList(unittest.TestCase): gen1 = iter(list1) out = [] - for i in range(len(list1)): + for _ in range(len(list1)): out.append(next(gen1)) self.assertRaises(StopIteration, next, gen1) self.assertEqual([0, 1, 2, 3, "one", "two"], out) @@ -260,7 +260,8 @@ class TestSmartList(unittest.TestCase): list3.sort(key=lambda i: i[1], reverse=True) self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) - def _dispatch_test_for_children(self, meth): + @staticmethod + def _dispatch_test_for_children(meth): """Run a test method on various different types of children.""" meth(lambda L: SmartList(list(L))[:]) meth(lambda L: SmartList([999] + list(L))[1:]) @@ -268,13 +269,13 @@ class TestSmartList(unittest.TestCase): meth(lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]) def test_docs(self): - """make sure the methods of SmartList/_ListProxy have docstrings""" + """make sure the methods of SmartList/ListProxy have docstrings""" methods = ["append", "count", "extend", "index", "insert", "pop", "remove", "reverse", "sort"] for meth in methods: expected = getattr(list, meth).__doc__ smartlist_doc = getattr(SmartList, meth).__doc__ - listproxy_doc = getattr(_ListProxy, meth).__doc__ + listproxy_doc = getattr(ListProxy, meth).__doc__ self.assertEqual(expected, smartlist_doc) self.assertEqual(expected, listproxy_doc) @@ -305,19 +306,19 @@ class TestSmartList(unittest.TestCase): self._test_list_methods(SmartList) def test_child_get_set_del(self): - """make sure _ListProxy's getitem/setitem/delitem work""" + """make sure ListProxy's getitem/setitem/delitem work""" self._dispatch_test_for_children(self._test_get_set_del_item) def test_child_add(self): - """make sure _ListProxy's add/radd/iadd work""" + """make sure ListProxy's add/radd/iadd work""" self._dispatch_test_for_children(self._test_add_radd_iadd) def test_child_other_magics(self): - """make sure _ListProxy's other magically implemented features work""" + """make sure ListProxy's other magically implemented features work""" self._dispatch_test_for_children(self._test_other_magic_methods) def test_child_methods(self): - """make sure _ListProxy's non-magic methods work, like append()""" + """make sure ListProxy's non-magic methods work, like append()""" self._dispatch_test_for_children(self._test_list_methods) def test_influence(self): diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 673d5fa..aa13f11 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,7 @@ class _FakeString(StringMixIn): def __init__(self, data): self._data = data - def __unicode__(self): + def __str__(self): return self._data @@ -128,7 +128,7 @@ class TestStringMixIn(unittest.TestCase): self.assertIsInstance(gen2, GeneratorType) out = [] - for i in range(len(str1)): + for _ in range(len(str1)): out.append(next(gen1)) self.assertRaises(StopIteration, next, gen1) self.assertEqual(expected, out) diff --git a/tests/test_tag.py b/tests/test_tag.py index 860a94b..1fb82e9 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -34,8 +34,8 @@ agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c) class TestTag(TreeEqualityTestCase): """Test cases for the Tag node.""" - def test_unicode(self): - """test Tag.__unicode__()""" + def test_str(self): + """test Tag.__str__()""" node1 = Tag(wraptext("ref")) node2 = Tag(wraptext("span"), wraptext("foo"), [agen("style", "color: red;")]) @@ -227,7 +227,7 @@ class TestTag(TreeEqualityTestCase): node.wiki_markup = "{" self.assertEqual("{|\n{", node) node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|") - self.assertEqual("|", node.wiki_style_separator) + self.assertEqual("|", node2.wiki_style_separator) def test_closing_wiki_markup(self): """test getter/setter for closing_wiki_markup attribute""" diff --git a/tests/test_template.py b/tests/test_template.py index 461371d..34dd32d 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2017 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -34,8 +34,8 @@ pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) class TestTemplate(TreeEqualityTestCase): """Test cases for the Template node.""" - def test_unicode(self): - """test Template.__unicode__()""" + def test_str(self): + """test Template.__str__()""" node = Template(wraptext("foobar")) self.assertEqual("{{foobar}}", str(node)) node2 = Template(wraptext("foo"), diff --git a/tests/test_text.py b/tests/test_text.py index 94da937..a54311a 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -26,8 +26,8 @@ from mwparserfromhell.nodes import Text class TestText(unittest.TestCase): """Test cases for the Text node.""" - def test_unicode(self): - """test Text.__unicode__()""" + def test_str(self): + """test Text.__str__()""" node = Text("foobar") self.assertEqual("foobar", str(node)) node2 = Text("fóóbar") diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 6ce28b5..1f6c02a 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -61,12 +61,9 @@ class TestTokens(unittest.TestCase): hundredchars = ("earwig" * 100)[:97] + "..." self.assertEqual("Token()", repr(token1)) - token2repr1 = "Token(foo='bar', baz=123)" - token2repr2 = "Token(baz=123, foo='bar')" - token3repr = "Text(text='" + hundredchars + "')" - token2repr = repr(token2) - self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) - self.assertEqual(token3repr, repr(token3)) + self.assertTrue(repr(token2) in ( + "Token(foo='bar', baz=123)", "Token(baz=123, foo='bar')")) + self.assertEqual("Text(text='" + hundredchars + "')", repr(token3)) def test_equality(self): """check that equivalent tokens are considered equal""" diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 9701865..0188ad0 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -24,8 +24,7 @@ import re from types import GeneratorType import unittest -from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, - Node, Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes import Argument, Heading, Template, Text from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode from mwparserfromhell import parse @@ -35,8 +34,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestWikicode(TreeEqualityTestCase): """Tests for the Wikicode class, which manages a list of nodes.""" - def test_unicode(self): - """test Wikicode.__unicode__()""" + def test_str(self): + """test Wikicode.__str__()""" code1 = parse("foobar") code2 = parse("Have a {{template}} and a [[page|link]]") self.assertEqual("foobar", str(code1)) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 1865b6e..597c18f 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2012-2016 Ben Kurtovic +# Copyright (C) 2012-2020 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestWikilink(TreeEqualityTestCase): """Test cases for the Wikilink node.""" - def test_unicode(self): - """test Wikilink.__unicode__()""" + def test_str(self): + """test Wikilink.__str__()""" node = Wikilink(wraptext("foobar")) self.assertEqual("[[foobar]]", str(node)) node2 = Wikilink(wraptext("foo"), wraptext("bar"))